NVIDIA-NeMo · terrykong · Mar 21, 2025 · Mar 21, 2025 · Mar 21, 2025 · Mar 21, 2025
@@ -64,15 +64,34 @@ jobs:
         - name: Docker pull image
           run: |
             docker pull nemoci.azurecr.io/nemo_reinforcer_container:${{ github.run_id }}
+
+        # NOTE: under certain circumstances, the checkout action cannot clean up the workspace properly, so
+        # this workaround is needed to ensure that the workspace is clean by removing all files created by root.
+        #
+        # The error observed looked like this from the checkout action:
+        #      Run actions/checkout@v4
+        #      ...
+        #      Deleting the contents of '/home/azureuser/actions-runner/_work/reinforcer/reinforcer'
+        #      Error: File was unable to be removed Error: EACCES: permission denied, rmdir '/home/azureuser/actions-runner/_work/reinforcer/reinforcer/docs/_build/doctest'
+        - name: Forcefully clean up the repository
+          run: |
+            docker run --rm -u root \
+              -v /home/azureuser/actions-runner/_work/reinforcer/reinforcer:/home/azureuser/actions-runner/_work/reinforcer/reinforcer \
+              nemoci.azurecr.io/nemo_reinforcer_container:${{ github.run_id }} \
+              bash -x -c "ls -lah /home/azureuser/actions-runner/_work/reinforcer/reinforcer && shopt -s dotglob && rm -rf /home/azureuser/actions-runner/_work/reinforcer/reinforcer/*"
+
+        - name: Checkout repository
+          uses: actions/checkout@v4
 
         - name: Start container
           run: |
             docker run --rm -d --name nemo_container_${{ github.run_id }} --runtime=nvidia --gpus all --shm-size=64g \
               --env TRANSFORMERS_OFFLINE=0 \
               --env HYDRA_FULL_ERROR=1 \
               --env HF_HOME=/home/TestData/reinforcer/hf_home \
-              --env REINFORCER_CI_DIR=/home/TestData/reinforcer \
-              --env REINFORCER_REPO_DIR=/opt/NeMo-Reinforcer \
+              --env REINFORCER_REPO_DIR=/opt/reinforcer \
+              --volume $PWD:/opt/reinforcer \
+              --volume /mnt/datadrive/TestData/reinforcer/datasets:/opt/reinforcer/datasets:ro \
               --volume /mnt/datadrive/TestData/reinforcer/checkpoints:/home/TestData/reinforcer/checkpoints:ro \
               --volume /mnt/datadrive/TestData/reinforcer/hf_home/hub:/home/TestData/reinforcer/hf_home/hub \
               nemoci.azurecr.io/nemo_reinforcer_container:${{ github.run_id }} \
@@ -94,6 +113,9 @@ jobs:
             set -e
 
             cmd=$(cat <<"RUN_TEST_EOF"
+            # This is needed since we create virtualenvs in the workspace, so this allows it to be cleaned up if necessary
+            umask 000
+
             nvidia-smi
 
             # In case git commands need to be run inside Reinforcer

@@ -102,6 +102,20 @@ jobs:
           pre-commit install
           pre-commit run --all-files --show-diff-on-failure --color=always
 
+  sphinx-build:
+    name: Sphinx build
+    needs: [pre-flight]
+    runs-on: ubuntu-latest
+    if: ${{ needs.pre-flight.outputs.run_ci == 'true' }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: build docs
+        run: |
+          pip install uv
+          cd docs/
+          uv run --extra docs sphinx-build . _build/html
+
   build-container:
     if: ${{ needs.pre-flight.outputs.run_ci == 'true' }}
     needs: [pre-flight]
@@ -115,6 +129,20 @@ jobs:
         MAX_JOBS=32
         REINFORCER_COMMIT=${{ github.sha }}
 
+  sphinx-doctest:
+    name: Sphinx doctest
+    needs: [build-container, pre-flight]
+    uses: ./.github/workflows/_run_test.yml
+    if: ${{ needs.pre-flight.outputs.run_ci == 'true' }}
+    with:
+      RUNNER: self-hosted-azure
+      TIMEOUT: 10
+      SCRIPT: |
+        cd ${REINFORCER_REPO_DIR}/docs
+        uv run --extra docs sphinx-build -b doctest . _build/doctest
+    secrets:
+      HF_TOKEN: ${{ secrets.HF_TOKEN }}
+
   unit-tests:
     name: Unit tests
     needs: [build-container, pre-flight]
@@ -125,6 +153,27 @@ jobs:
       TIMEOUT: 10
       SCRIPT: |
         cd ${REINFORCER_REPO_DIR}
-        uv run bash -x ./tests/run_unit.sh
+        uv run --extra test bash -x ./tests/run_unit.sh
     secrets:
       HF_TOKEN: ${{ secrets.HF_TOKEN }}
+
+  functional-tests:
+    name: ${{ matrix.test_case }}
+    needs: [build-container, pre-flight]
+    uses: ./.github/workflows/_run_test.yml
+    if: ${{ needs.pre-flight.outputs.run_ci == 'true' }}
+    strategy:
+      matrix:
+        test_case:
+          - sft.sh
+          - grpo.sh
+    with:
+      # TODO: For now, allow these to fail since the checks are not robust.
+      OPTIONAL: true
+      RUNNER: self-hosted-azure
+      TIMEOUT: 8
+      SCRIPT: |
+        cd ${REINFORCER_REPO_DIR}
+        uv run bash ./tests/functional/${{ matrix.test_case }}
+    secrets:
+      HF_TOKEN: ${{ secrets.HF_TOKEN }}
@@ -1,18 +1,41 @@
 ARG BASE_IMAGE=anyscale/ray:2.43.0-py312-cu125
-FROM ${BASE_IMAGE}
+FROM ${BASE_IMAGE} AS base
+# base is just ray + uv with minimal installs so it is a very lightweight container
 
-WORKDIR /opt/NeMo-Reinforcer
+# It is more convenient for users to run as root
+USER root
 
-RUN sudo apt-get update && sudo apt-get install -y jq
+RUN apt-get update && sudo apt-get install -y jq
 
+RUN pip install uv
 RUN echo "unset RAY_RUNTIME_ENV_HOOK" >> /home/ray/.bashrc
 
-COPY pyproject.toml .
+FROM base AS hermetic
+# hermetic creates a virtual environment with the default dependencies pre-installed for convenience
 
-RUN pip install uv && \
-    uv venv -p python3.12 && \
-    uv pip install -r pyproject.toml --extra dev --extra test
+COPY --chown=ray --chmod=755 pyproject.toml /opt/reinforcer/pyproject.toml
+RUN chmod 755 /home/ray/.cache
+WORKDIR /opt/reinforcer
+RUN uv venv .venv
+# uv sync has a more reliable resolver than simple uv pip install which can fail
+RUN uv sync --extra test --extra dev --extra docs --no-install-project
 
-COPY . .
+ENV VIRTUAL_ENV=/opt/reinforcer/.venv
+ENV PATH="/opt/reinforcer/.venv/bin:$PATH"
+# The ray images automatically activate the anaconda venv. We will
+# comment this out of the .bashrc to give the same UX between docker
+# and other clusters like slurm.
+RUN <<"EOF"
+cp ~/.bashrc ~/.bashrc.backup  # backup existing .bashrc
 
-RUN uv pip install -e .
+# Comment out the conda initialize block
+sed -i '/# >>> conda initialize >>>/,/# <<< conda initialize <<</ { /^[^#]/ s/^/# / }' ~/.bashrc
+
+# Comment out any line that explicitly exports the anaconda3 PATH
+sed -i '/export PATH=\$HOME\/anaconda3\/bin:\$PATH/ s/^/# /' ~/.bashrc
+EOF
+
+COPY --chown=ray --chmod=755 . /opt/reinforcer
+RUN uv pip install --no-deps --editable /opt/reinforcer
+
+FROM base
@@ -21,7 +21,7 @@ export UV_CACHE_DIR=/path/that/all/workers/can/access/uv_cache
 
 ```sh
 # Run from the root of NeMo-Reinforcer repo
-NUM_ACTOR_NODES=1  # Total nodes requested are $NUM_ACTOR_NODES + 1 (+1 for head node)
+NUM_ACTOR_NODES=1  # Total nodes requested (head is colocated on ray-worker-0)
 
 COMMAND="bash -c 'uv pip install -e .; uv run ./examples/run_grpo.py'" \
 RAY_DEDUP_LOGS=0 \
@@ -55,7 +55,7 @@ tail -f 1980204-logs/ray-driver.log
 To run interactively, launch the same command as the [Batched Job Submission](#batched-job-submission) except omit the `COMMAND` line:
 ```sh
 # Run from the root of NeMo-Reinforcer repo
-NUM_ACTOR_NODES=1  # Total nodes requested are $NUM_ACTOR_NODES + 1 (+1 for head node)
+NUM_ACTOR_NODES=1  # Total nodes requested (head is colocated on ray-worker-0)
 
 RAY_DEDUP_LOGS=0 \
 UV_CACHE_DIR=YOUR_UV_CACHE_DIR \

@@ -1,7 +1,22 @@
-# Building Docker Image
+# Building Docker Images
 
-## Docker Build
+### Base Image
+If you only need the base image with ray + uv, you can build it like so:
 ```sh
 cd docker/
-docker buildx build -t nemo-reinforcer -f Dockerfile .
+docker buildx build -t reinforcer -f Dockerfile ..
 ```
+
+This is **our recommendation** as it is a small image and allows you to specify your python dependencies at runtime.
+
+### Hermetic Image
+We also provide a way to build the docker image with all of default dependencies to get started.
+```sh
+cd docker/
+docker buildx build --target hermetic -t reinforcer -f Dockerfile ..
+```
+
+This image sets up the python environment for you, so you do not have to use `uv` if you don't need
+any other packages.
+
+This image is useful in situations where you may not have network connectivity to re-download packages.
@@ -1,24 +1,70 @@
-# Testing NeMo-Reinforcer
+# Testing Reinforcer
 
 ## Unit Tests
 
+:::{important}
+Unit tests require 2 GPUs to test the full suite.
+:::
+
 ```sh
+# Install the project and the test dependencies
 uv pip install -e '.[test]'
+
+# Run the unit tests using local GPUs
 uv run bash tests/run_unit.sh 
 ```
 
-### Run Unit Tests Hermetic
+:::{note}
+Tests can also be run on SLURM with `ray.sub`, but note that some tests will be skipped
+due to no GPUs being located on the head node. To run the full suite of tests, please
+launch on a regular GPU allocation.
+:::
 
-If your local environment does not have all the necessary dependencies (e.g., `gcc`, `nvcc`)
-or there is concern that something in your environment may be misconfigured, you can also run
-the tests in docker with this script:
+### Running Unit Tests in a Hermetic Environment
+
+For environments lacking necessary dependencies (e.g., `gcc`, `nvcc`)
+or where environmental configuration may be problematic, tests can be run
+in docker with this script:
 
 ```sh
 CONTAINER=... bash tests/run_unit_in_docker.sh
 ```
 
-The `CONTAINER` can be built by following the instructions [here](docker.md).
+The required `CONTAINER` can be built by following the instructions in the [docker documentation](docker.md).
 
 ## Functional tests
 
-TBD
+:::{important}
+Functional tests may require multiple GPUs to run. See each script to understand the requirements.
+:::
+
+Functional tests are located under `tests/functional/`.
+
+```sh
+# Install the project and the test dependencies
+uv pip install -e '.[test]'
+# Run the functional test for sft
+uv run bash tests/functional/sft.sh
+```
+
+At the end of each functional test, the metric checks will be printed as well as
+whether they pass or fail. Here is an example:
+
+```text
+                              Metric Checks
+┏━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┓
+┃ Status ┃ Check                          ┃ Value             ┃ Message ┃
+┡━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━┩
+│ PASS   │ data["train/loss"]["9"] < 1500 │ 817.4517822265625 │         │
+└────────┴────────────────────────────────┴───────────────────┴─────────┘
+```
+
+### Running Functional Tests in a Hermetic Environment
+
+For environments lacking necessary dependencies (e.g., `gcc`, `nvcc`)
+or where environmental configuration may be problematic, tests can be run
+in docker with this script:
+
+```sh
+CONTAINER=... bash run_functional_in_docker.sh functional/sft.sh
+```
@@ -170,7 +170,9 @@ def main():
     args, overrides = parse_args()
 
     if not args.config:
-        args.config = os.path.join(os.path.dirname(__file__), "configs", "grpo_math_1B.yaml")
+        args.config = os.path.join(
+            os.path.dirname(__file__), "configs", "grpo_math_1B.yaml"
+        )
 
     config = load_config(args.config)
     print(f"Loaded configuration from: {args.config}")

@@ -445,7 +445,9 @@ def grpo_train(
 
     # Run grpo training (single-turn)
     for batch in dataloader:
-        print(f"\n{'=' * 25} Step {step + 1}/{min(len(dataloader), master_config['grpo']['max_num_steps'])} {'=' * 25}")
+        print(
+            f"\n{'=' * 25} Step {step + 1}/{min(len(dataloader), master_config['grpo']['max_num_steps'])} {'=' * 25}"
+        )
 
         with timer.time("total_step_time"):
             # Prepare batch

@@ -189,7 +189,30 @@ def log_hyperparams(self, params: Dict[str, Any]) -> None:
 
 
 def flatten_dict(d: Dict[str, Any], sep: str = ".") -> Dict[str, Any]:
-    """Flatten a nested dictionary."""
+    """Flatten a nested dictionary.
+
+    Handles nested dictionaries and lists by creating keys with separators.
+    For lists, the index is used as part of the key.
+
+    Args:
+        d: Dictionary to flatten
+        sep: Separator to use between nested keys
+
+    Returns:
+        Flattened dictionary with compound keys
+
+    Examples:
+        ```{doctest}
+        >>> flatten_dict({"a": 1, "b": {"c": 2}})
+        {'a': 1, 'b.c': 2}
+
+        >>> flatten_dict({"a": [1, 2], "b": {"c": [3, 4]}})
+        {'a.0': 1, 'a.1': 2, 'b.c.0': 3, 'b.c.1': 4}
+
+        >>> flatten_dict({"a": [{"b": 1}, {"c": 2}]})
+        {'a.0.b': 1, 'a.1.c': 2}
+        ```
+    """
     result = {}
 
     def _flatten(d, parent_key=""):
@@ -198,6 +221,13 @@ def _flatten(d, parent_key=""):
 
             if isinstance(value, dict):
                 _flatten(value, new_key)
+            elif isinstance(value, list):
+                for i, item in enumerate(value):
+                    list_key = f"{new_key}{sep}{i}"
+                    if isinstance(item, dict):
+                        _flatten(item, list_key)
+                    else:
+                        result[list_key] = item
             else:
                 result[new_key] = value