Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 24 additions & 2 deletions .github/workflows/_run_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,15 +64,34 @@ jobs:
- name: Docker pull image
run: |
docker pull nemoci.azurecr.io/nemo_reinforcer_container:${{ github.run_id }}

# NOTE: under certain circumstances, the checkout action cannot clean up the workspace properly, so
# this workaround is needed to ensure that the workspace is clean by removing all files created by root.
#
# The error observed looked like this from the checkout action:
# Run actions/checkout@v4
# ...
# Deleting the contents of '/home/azureuser/actions-runner/_work/reinforcer/reinforcer'
# Error: File was unable to be removed Error: EACCES: permission denied, rmdir '/home/azureuser/actions-runner/_work/reinforcer/reinforcer/docs/_build/doctest'
- name: Forcefully clean up the repository
run: |
docker run --rm -u root \
-v /home/azureuser/actions-runner/_work/reinforcer/reinforcer:/home/azureuser/actions-runner/_work/reinforcer/reinforcer \
nemoci.azurecr.io/nemo_reinforcer_container:${{ github.run_id }} \
bash -x -c "ls -lah /home/azureuser/actions-runner/_work/reinforcer/reinforcer && shopt -s dotglob && rm -rf /home/azureuser/actions-runner/_work/reinforcer/reinforcer/*"

- name: Checkout repository
uses: actions/checkout@v4

- name: Start container
run: |
docker run --rm -d --name nemo_container_${{ github.run_id }} --runtime=nvidia --gpus all --shm-size=64g \
--env TRANSFORMERS_OFFLINE=0 \
--env HYDRA_FULL_ERROR=1 \
--env HF_HOME=/home/TestData/reinforcer/hf_home \
--env REINFORCER_CI_DIR=/home/TestData/reinforcer \
--env REINFORCER_REPO_DIR=/opt/NeMo-Reinforcer \
--env REINFORCER_REPO_DIR=/opt/reinforcer \
--volume $PWD:/opt/reinforcer \
--volume /mnt/datadrive/TestData/reinforcer/datasets:/opt/reinforcer/datasets:ro \
--volume /mnt/datadrive/TestData/reinforcer/checkpoints:/home/TestData/reinforcer/checkpoints:ro \
--volume /mnt/datadrive/TestData/reinforcer/hf_home/hub:/home/TestData/reinforcer/hf_home/hub \
nemoci.azurecr.io/nemo_reinforcer_container:${{ github.run_id }} \
Expand All @@ -94,6 +113,9 @@ jobs:
set -e

cmd=$(cat <<"RUN_TEST_EOF"
# This is needed since we create virtualenvs in the workspace, so this allows it to be cleaned up if necessary
umask 000

nvidia-smi

# In case git commands need to be run inside Reinforcer
Expand Down
51 changes: 50 additions & 1 deletion .github/workflows/cicd-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,20 @@ jobs:
pre-commit install
pre-commit run --all-files --show-diff-on-failure --color=always

sphinx-build:
name: Sphinx build
needs: [pre-flight]
runs-on: ubuntu-latest
if: ${{ needs.pre-flight.outputs.run_ci == 'true' }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: build docs
run: |
pip install uv
cd docs/
uv run --extra docs sphinx-build . _build/html

build-container:
if: ${{ needs.pre-flight.outputs.run_ci == 'true' }}
needs: [pre-flight]
Expand All @@ -115,6 +129,20 @@ jobs:
MAX_JOBS=32
REINFORCER_COMMIT=${{ github.sha }}

sphinx-doctest:
name: Sphinx doctest
needs: [build-container, pre-flight]
uses: ./.github/workflows/_run_test.yml
if: ${{ needs.pre-flight.outputs.run_ci == 'true' }}
with:
RUNNER: self-hosted-azure
TIMEOUT: 10
SCRIPT: |
cd ${REINFORCER_REPO_DIR}/docs
uv run --extra docs sphinx-build -b doctest . _build/doctest
secrets:
HF_TOKEN: ${{ secrets.HF_TOKEN }}

unit-tests:
name: Unit tests
needs: [build-container, pre-flight]
Expand All @@ -125,6 +153,27 @@ jobs:
TIMEOUT: 10
SCRIPT: |
cd ${REINFORCER_REPO_DIR}
uv run bash -x ./tests/run_unit.sh
uv run --extra test bash -x ./tests/run_unit.sh
secrets:
HF_TOKEN: ${{ secrets.HF_TOKEN }}

functional-tests:
name: ${{ matrix.test_case }}
needs: [build-container, pre-flight]
uses: ./.github/workflows/_run_test.yml
if: ${{ needs.pre-flight.outputs.run_ci == 'true' }}
strategy:
matrix:
test_case:
- sft.sh
- grpo.sh
with:
# TODO: For now, allow these to fail since the checks are not robust.
OPTIONAL: true
RUNNER: self-hosted-azure
TIMEOUT: 8
SCRIPT: |
cd ${REINFORCER_REPO_DIR}
uv run bash ./tests/functional/${{ matrix.test_case }}
secrets:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
41 changes: 32 additions & 9 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,18 +1,41 @@
ARG BASE_IMAGE=anyscale/ray:2.43.0-py312-cu125
FROM ${BASE_IMAGE}
FROM ${BASE_IMAGE} AS base
# base is just ray + uv with minimal installs so it is a very lightweight container

WORKDIR /opt/NeMo-Reinforcer
# It is more convenient for users to run as root
USER root

RUN sudo apt-get update && sudo apt-get install -y jq
RUN apt-get update && sudo apt-get install -y jq

RUN pip install uv
RUN echo "unset RAY_RUNTIME_ENV_HOOK" >> /home/ray/.bashrc

COPY pyproject.toml .
FROM base AS hermetic
# hermetic creates a virtual environment with the default dependencies pre-installed for convenience

RUN pip install uv && \
uv venv -p python3.12 && \
uv pip install -r pyproject.toml --extra dev --extra test
COPY --chown=ray --chmod=755 pyproject.toml /opt/reinforcer/pyproject.toml
RUN chmod 755 /home/ray/.cache
WORKDIR /opt/reinforcer
RUN uv venv .venv
# uv sync has a more reliable resolver than simple uv pip install which can fail
RUN uv sync --extra test --extra dev --extra docs --no-install-project

COPY . .
ENV VIRTUAL_ENV=/opt/reinforcer/.venv
ENV PATH="/opt/reinforcer/.venv/bin:$PATH"
# The ray images automatically activate the anaconda venv. We will
# comment this out of the .bashrc to give the same UX between docker
# and other clusters like slurm.
RUN <<"EOF"
cp ~/.bashrc ~/.bashrc.backup # backup existing .bashrc

RUN uv pip install -e .
# Comment out the conda initialize block
sed -i '/# >>> conda initialize >>>/,/# <<< conda initialize <<</ { /^[^#]/ s/^/# / }' ~/.bashrc

# Comment out any line that explicitly exports the anaconda3 PATH
sed -i '/export PATH=\$HOME\/anaconda3\/bin:\$PATH/ s/^/# /' ~/.bashrc
EOF

COPY --chown=ray --chmod=755 . /opt/reinforcer
RUN uv pip install --no-deps --editable /opt/reinforcer

FROM base
4 changes: 2 additions & 2 deletions docs/cluster.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ export UV_CACHE_DIR=/path/that/all/workers/can/access/uv_cache

```sh
# Run from the root of NeMo-Reinforcer repo
NUM_ACTOR_NODES=1 # Total nodes requested are $NUM_ACTOR_NODES + 1 (+1 for head node)
NUM_ACTOR_NODES=1 # Total nodes requested (head is colocated on ray-worker-0)

COMMAND="bash -c 'uv pip install -e .; uv run ./examples/run_grpo.py'" \
RAY_DEDUP_LOGS=0 \
Expand Down Expand Up @@ -55,7 +55,7 @@ tail -f 1980204-logs/ray-driver.log
To run interactively, launch the same command as the [Batched Job Submission](#batched-job-submission) except omit the `COMMAND` line:
```sh
# Run from the root of NeMo-Reinforcer repo
NUM_ACTOR_NODES=1 # Total nodes requested are $NUM_ACTOR_NODES + 1 (+1 for head node)
NUM_ACTOR_NODES=1 # Total nodes requested (head is colocated on ray-worker-0)

RAY_DEDUP_LOGS=0 \
UV_CACHE_DIR=YOUR_UV_CACHE_DIR \
Expand Down
21 changes: 18 additions & 3 deletions docs/docker.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,22 @@
# Building Docker Image
# Building Docker Images

## Docker Build
### Base Image
If you only need the base image with ray + uv, you can build it like so:
```sh
cd docker/
docker buildx build -t nemo-reinforcer -f Dockerfile .
docker buildx build -t reinforcer -f Dockerfile ..
```

This is **our recommendation** as it is a small image and allows you to specify your python dependencies at runtime.

### Hermetic Image
We also provide a way to build the docker image with all of default dependencies to get started.
```sh
cd docker/
docker buildx build --target hermetic -t reinforcer -f Dockerfile ..
```

This image sets up the python environment for you, so you do not have to use `uv` if you don't need
any other packages.

This image is useful in situations where you may not have network connectivity to re-download packages.
60 changes: 53 additions & 7 deletions docs/testing.md
Original file line number Diff line number Diff line change
@@ -1,24 +1,70 @@
# Testing NeMo-Reinforcer
# Testing Reinforcer

## Unit Tests

:::{important}
Unit tests require 2 GPUs to test the full suite.
:::

```sh
# Install the project and the test dependencies
uv pip install -e '.[test]'

# Run the unit tests using local GPUs
uv run bash tests/run_unit.sh
```

### Run Unit Tests Hermetic
:::{note}
Tests can also be run on SLURM with `ray.sub`, but note that some tests will be skipped
due to no GPUs being located on the head node. To run the full suite of tests, please
launch on a regular GPU allocation.
:::

If your local environment does not have all the necessary dependencies (e.g., `gcc`, `nvcc`)
or there is concern that something in your environment may be misconfigured, you can also run
the tests in docker with this script:
### Running Unit Tests in a Hermetic Environment

For environments lacking necessary dependencies (e.g., `gcc`, `nvcc`)
or where environmental configuration may be problematic, tests can be run
in docker with this script:

```sh
CONTAINER=... bash tests/run_unit_in_docker.sh
```

The `CONTAINER` can be built by following the instructions [here](docker.md).
The required `CONTAINER` can be built by following the instructions in the [docker documentation](docker.md).

## Functional tests

TBD
:::{important}
Functional tests may require multiple GPUs to run. See each script to understand the requirements.
:::

Functional tests are located under `tests/functional/`.

```sh
# Install the project and the test dependencies
uv pip install -e '.[test]'
# Run the functional test for sft
uv run bash tests/functional/sft.sh
```

At the end of each functional test, the metric checks will be printed as well as
whether they pass or fail. Here is an example:

```text
Metric Checks
┏━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┓
┃ Status ┃ Check ┃ Value ┃ Message ┃
┡━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━┩
│ PASS │ data["train/loss"]["9"] < 1500 │ 817.4517822265625 │ │
└────────┴────────────────────────────────┴───────────────────┴─────────┘
```

### Running Functional Tests in a Hermetic Environment

For environments lacking necessary dependencies (e.g., `gcc`, `nvcc`)
or where environmental configuration may be problematic, tests can be run
in docker with this script:

```sh
CONTAINER=... bash run_functional_in_docker.sh functional/sft.sh
```
4 changes: 3 additions & 1 deletion examples/run_grpo_math.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,9 @@ def main():
args, overrides = parse_args()

if not args.config:
args.config = os.path.join(os.path.dirname(__file__), "configs", "grpo_math_1B.yaml")
args.config = os.path.join(
os.path.dirname(__file__), "configs", "grpo_math_1B.yaml"
)

config = load_config(args.config)
print(f"Loaded configuration from: {args.config}")
Expand Down
4 changes: 3 additions & 1 deletion nemo_reinforcer/algorithms/grpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,9 @@ def grpo_train(

# Run grpo training (single-turn)
for batch in dataloader:
print(f"\n{'=' * 25} Step {step + 1}/{min(len(dataloader), master_config['grpo']['max_num_steps'])} {'=' * 25}")
print(
f"\n{'=' * 25} Step {step + 1}/{min(len(dataloader), master_config['grpo']['max_num_steps'])} {'=' * 25}"
)

with timer.time("total_step_time"):
# Prepare batch
Expand Down
32 changes: 31 additions & 1 deletion nemo_reinforcer/utils/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,30 @@ def log_hyperparams(self, params: Dict[str, Any]) -> None:


def flatten_dict(d: Dict[str, Any], sep: str = ".") -> Dict[str, Any]:
"""Flatten a nested dictionary."""
"""Flatten a nested dictionary.

Handles nested dictionaries and lists by creating keys with separators.
For lists, the index is used as part of the key.

Args:
d: Dictionary to flatten
sep: Separator to use between nested keys

Returns:
Flattened dictionary with compound keys

Examples:
```{doctest}
>>> flatten_dict({"a": 1, "b": {"c": 2}})
{'a': 1, 'b.c': 2}

>>> flatten_dict({"a": [1, 2], "b": {"c": [3, 4]}})
{'a.0': 1, 'a.1': 2, 'b.c.0': 3, 'b.c.1': 4}

>>> flatten_dict({"a": [{"b": 1}, {"c": 2}]})
{'a.0.b': 1, 'a.1.c': 2}
```
"""
result = {}

def _flatten(d, parent_key=""):
Expand All @@ -198,6 +221,13 @@ def _flatten(d, parent_key=""):

if isinstance(value, dict):
_flatten(value, new_key)
elif isinstance(value, list):
for i, item in enumerate(value):
list_key = f"{new_key}{sep}{i}"
if isinstance(item, dict):
_flatten(item, list_key)
else:
result[list_key] = item
else:
result[new_key] = value

Expand Down
Loading