From a1fe428b5269497093f5b01ea8bf17b0e4b468c8 Mon Sep 17 00:00:00 2001 From: Yi-Fu Wu Date: Tue, 25 Mar 2025 15:10:53 -0700 Subject: [PATCH 01/27] WIP commit Signed-off-by: Yi-Fu Wu --- examples/configs/grpo_math_1B.yaml | 15 +- examples/run_grpo_math.py | 3 +- nemo_reinforcer/distributed/worker_groups.py | 1 + nemo_reinforcer/models/policy/hf_policy.py | 214 +++++++++++++++++-- 4 files changed, 209 insertions(+), 24 deletions(-) diff --git a/examples/configs/grpo_math_1B.yaml b/examples/configs/grpo_math_1B.yaml index 72aad000ce..a8149156e9 100644 --- a/examples/configs/grpo_math_1B.yaml +++ b/examples/configs/grpo_math_1B.yaml @@ -5,7 +5,7 @@ grpo: max_num_steps: 1000000 normalize_rewards: true use_leave_one_out_baseline: true - val_period: 10 + val_period: -1 val_at_start: false max_val_samples: 256 val_batch_size: 256 @@ -15,7 +15,7 @@ loss_fn: ratio_eps: 0.2 checkpointing: - enabled: true + enabled: false checkpoint_dir: "results/grpo" metric_name: "val_reward" higher_is_better: true @@ -29,7 +29,8 @@ policy: generation_batch_size: 32 logprob_batch_size: 4 max_total_sequence_length: 512 - precision: "bfloat16" + # precision: "bfloat16" + precision: "float32" optimizer: name: "torch.optim.AdamW" @@ -75,17 +76,17 @@ env: logger: log_dir: "logs" # Base directory for all logs num_val_samples_to_print: 0 # Number of validation samples to pretty print on terminal - wandb_enabled: false + wandb_enabled: true tensorboard_enabled: false monitor_gpus: false # If true, will monitor GPU usage and log to wandb and/or tensorboard wandb: - project: "grpo-dev" - name: "grpo-dev-logger" + project: "grpo-dev-yifu" + name: "grpo-dev-offload2" tensorboard: {} gpu_monitoring: collection_interval: 10 # How often to collect GPU usage metrics (in seconds) flush_interval: 10 # How often to flush GPU usage metrics to the loggers (in seconds) cluster: - gpus_per_node: 1 + gpus_per_node: 2 num_nodes: 1 diff --git a/examples/run_grpo_math.py b/examples/run_grpo_math.py index bc11687ab0..d3cb3fd805 100644 --- a/examples/run_grpo_math.py +++ b/examples/run_grpo_math.py @@ -23,7 +23,7 @@ from transformers import AutoTokenizer from collections import defaultdict -from nemo_reinforcer.algorithms.grpo import MasterConfig, grpo_train, setup +from nemo_reinforcer.algorithms.grpo import MasterConfig, grpo_train, setup, refit_policy_generation from nemo_reinforcer.distributed.virtual_cluster import init_ray from nemo_reinforcer.utils.config import load_config from nemo_reinforcer.utils.logger import get_next_experiment_dir @@ -271,6 +271,7 @@ def main(): grpo_state, master_config, ) = setup(config, dataset, val_dataset) + # refit_policy_generation(policy, policy_generation) grpo_train( policy, policy_generation, diff --git a/nemo_reinforcer/distributed/worker_groups.py b/nemo_reinforcer/distributed/worker_groups.py index 9cc7798631..93d4808b75 100644 --- a/nemo_reinforcer/distributed/worker_groups.py +++ b/nemo_reinforcer/distributed/worker_groups.py @@ -125,6 +125,7 @@ def __call__( num_gpus=num_gpus, bundle_indices=bundle_indices, ) + print("HERE worker_class", worker_class, "env_vars", env_vars) # Apply resource configuration if resources and "num_gpus" in resources: diff --git a/nemo_reinforcer/models/policy/hf_policy.py b/nemo_reinforcer/models/policy/hf_policy.py index 34a621063d..6478eda6e2 100644 --- a/nemo_reinforcer/models/policy/hf_policy.py +++ b/nemo_reinforcer/models/policy/hf_policy.py @@ -21,6 +21,8 @@ import ray import torch from torch.distributed.device_mesh import init_device_mesh +from torch.distributed.fsdp.api import CPUOffload +from torch.distributed.fsdp._runtime_utils import _lazy_init from torch.distributed.fsdp import ( FullyShardedDataParallel, FullStateDictConfig, @@ -46,7 +48,163 @@ from nemo_reinforcer.distributed.virtual_cluster import ( PY_EXECUTABLES, ) - +# def move_to_cpu(model): +# return model.to("cpu") + +# def move_to_cpu(model): +# for param in model.parameters(): +# param.data = param.data.to("cpu") +# for buffer in model.buffers(): +# buffer.data = buffer.data.to("cpu") +# if hasattr(model, "_fsdp_wrapped_module"): +# model._fsdp_wrapped_module.to("cpu") + +# return model + +# def move_to_gpu(model): +# return model.to("cuda") + +def move_to_cpu(model): + # offload2 + # if not isinstance(model, FullyShardedDataParallel): + # return model.to("cpu") + + for _, param in model.named_parameters(): + param.data = param.data.to("cpu", non_blocking=True) + if hasattr(param, "_local_shard"): + # param._local_shard = param._local_shard.to("cpu", non_blocking=True) + param._local_shard = param.data + # param.data = param.data.to("cpu", non_blocking=True) + if param.grad is not None: + param.grad = param.grad.to("cpu", non_blocking=True) + + # for buffer in model.buffers(): + # buffer.data = buffer.data.to("cpu") + # if torch.distributed.get_rank() == 0: + # import pdb; pdb.set_trace() + if hasattr(model, "_fsdp_wrapped_module"): + move_to_cpu(model._fsdp_wrapped_module) + # model._fsdp_wrapped_module.to("cpu") + + return model + +def move_to_gpu(model): + # offload2 + # if not isinstance(model, FullyShardedDataParallel): + # return model.to("cuda") + + for _, param in model.named_parameters(): + param.data = param.data.to("cuda", non_blocking=True) + if hasattr(param, "_local_shard"): + # param._local_shard = param._local_shard.to("cuda", non_blocking=True) + param._local_shard = param.data + # param.data = param.data.to("cuda", non_blocking=True) + if param.grad is not None: + param.grad = param.grad.to("cuda", non_blocking=True) + + # for buffer in model.buffers(): + # buffer.data = buffer.data.to("cuda") + if hasattr(model, "_fsdp_wrapped_module"): + move_to_gpu(model._fsdp_wrapped_module) + + return model + + +# def move_to_cpu(model): +# if not isinstance(model, FullyShardedDataParallel): +# model = model.to("cpu") +# return model +# # lazy init FSDP model +# _lazy_init(model, model) +# # assert model._is_root, f"Only support root model offloading to CPU" +# # # if torch.distributed.get_rank() == 0: +# # # import ipdb; ipdb.set_trace() +# for handle in model._all_handles: +# if handle._offload_params: +# continue +# flat_param = handle.flat_param +# # assert flat_param.data.data_ptr() == flat_param._local_shard.data_ptr() and \ +# # id(flat_param.data) != id(flat_param._local_shard) and \ +# # flat_param.data.size() == flat_param._local_shard.size() +# handle.flat_param_to(torch.device("cpu"), non_blocking=True) +# # the following still keeps id(._local_shard) != id(.data) +# flat_param._local_shard = flat_param.data +# # assert id(flat_param._local_shard) != id(flat_param.data) +# return model + +# def move_to_gpu(model): +# if not isinstance(model, FullyShardedDataParallel): +# model = model.to("cuda") +# return model +# # lazy init FSDP model +# _lazy_init(model, model) +# # assert model._is_root, f"Only support root model loading to GPU" +# device_id = torch.cuda.current_device() +# for handle in model._all_handles: +# if handle._offload_params: +# continue +# flat_param = handle.flat_param +# handle.flat_param_to(torch.device(f"cuda:{device_id}"), non_blocking=True) +# # the following still keeps id(._local_shard) != id(.data) +# flat_param._local_shard = flat_param.data +# return model + + +# def move_to_cpu(model): +# if not isinstance(model, FullyShardedDataParallel): +# model = model.to("cpu") +# return model +# # lazy init FSDP model +# _lazy_init(model, model) +# assert model._is_root, f"Only support root model offloading to CPU" +# # if torch.distributed.get_rank() == 0: +# # import ipdb; ipdb.set_trace() +# for handle in model._all_handles: +# if handle._offload_params: +# continue +# flat_param = handle.flat_param +# assert flat_param.data.data_ptr() == flat_param._local_shard.data_ptr() and \ +# id(flat_param.data) != id(flat_param._local_shard) and \ +# flat_param.data.size() == flat_param._local_shard.size() +# handle.flat_param_to(torch.device("cpu"), non_blocking=True) +# # the following still keeps id(._local_shard) != id(.data) +# flat_param._local_shard = flat_param.data +# assert id(flat_param._local_shard) != id(flat_param.data) + +# for param in model.parameters(): +# param.data = param.data.to("cpu") +# for buffer in model.buffers(): +# buffer.data = buffer.data.to("cpu") +# if hasattr(model, "_fsdp_wrapped_module"): +# move_to_cpu(model._fsdp_wrapped_module) +# # model._fsdp_wrapped_module.to("cpu") + +# return model + +# def move_to_gpu(model): +# if not isinstance(model, FullyShardedDataParallel): +# model = model.to("cuda") +# return model +# # lazy init FSDP model +# _lazy_init(model, model) +# assert model._is_root, f"Only support root model loading to GPU" +# device_id = torch.cuda.current_device() +# for handle in model._all_handles: +# if handle._offload_params: +# continue +# flat_param = handle.flat_param +# handle.flat_param_to(torch.device(f"cuda:{device_id}"), non_blocking=True) +# # the following still keeps id(._local_shard) != id(.data) +# flat_param._local_shard = flat_param.data + +# for param in model.parameters(): +# param.data = param.data.to("cuda") +# for buffer in model.buffers(): +# buffer.data = buffer.data.to("cuda") +# if hasattr(model, "_fsdp_wrapped_module"): +# move_to_gpu(model._fsdp_wrapped_module) + +# return model @ray.remote class HfPolicyWorker: @@ -104,6 +262,15 @@ def __init__( # (Initialize device mesh, shard submodules, then shard entire model) # ------------------------------------------------ + @staticmethod + def configure_worker( + num_gpus: int | float, bundle_indices: Optional[list] = None + ) -> tuple[dict, dict, dict]: + env_vars = { + "PYTORCH_CUDA_ALLOC_CONF": "expandable_segments:True" + } + return None, env_vars, None + def do_fsdp(model): # Create a device mesh with 'world_size' GPUs in a 1D arrangement. mesh = init_device_mesh("cuda", (world_size,)) @@ -113,20 +280,31 @@ def do_fsdp(model): buffer_dtype=torch.float32, ) + # return FullyShardedDataParallel( + # model, + # device_mesh=mesh, + # auto_wrap_policy=size_based_auto_wrap_policy, + # ) return FullyShardedDataParallel( model, device_mesh=mesh, auto_wrap_policy=size_based_auto_wrap_policy, mixed_precision=mp_policy, + # cpu_offload=CPUOffload(offload_params=True), ) - self.model.to("cuda") + # self.model.to("cuda") + self.model = move_to_gpu(self.model) self.model = do_fsdp(self.model) - self.model = self.move_to_cpu(self.model) - self.reference_model.to("cuda") + self.model = move_to_cpu(self.model) + # self.reference_model.to("cuda") + + self.reference_model = move_to_gpu(self.reference_model) self.reference_model = do_fsdp(self.reference_model) - self.reference_model = self.move_to_cpu(self.reference_model) - self.model.to("cuda") + self.reference_model = move_to_cpu(self.reference_model) + + # self.model.to("cuda") + self.model = move_to_gpu(self.model) self._held_reference_model_params = None # register_fsdp_forward_method(self.model, "generate") if init_optimizer: @@ -422,8 +600,9 @@ def use_reference_model(self): original_model = self.model original_reference_model = self.reference_model - self.model = self.move_to_cpu(self.model) - self.reference_model = self.reference_model.to("cuda") + self.model = move_to_cpu(self.model) + # self.reference_model = self.reference_model.to("cuda") + self.reference_model = move_to_gpu(self.reference_model) # Swap the references self.model, self.reference_model = self.reference_model, self.model @@ -436,8 +615,9 @@ def use_reference_model(self): finally: # Restore original references and device placement - self.reference_model = self.move_to_cpu(original_reference_model) - self.model = original_model.to("cuda") + self.reference_model = move_to_cpu(original_reference_model) + # self.model = original_model.to("cuda") + self.model = move_to_gpu(original_model) gc.collect() torch.cuda.empty_cache() @@ -704,19 +884,21 @@ def get_weight_ipc_handles(self, offload_model=True): data[name] = reduce_tensor(p.detach()) if offload_model: - self.model = self.move_to_cpu(self.model) + self.model = move_to_cpu(self.model) gc.collect() torch.cuda.empty_cache() return {self.device_uuid: data} def prepare_for_lp_inference(self): - self.model.to("cuda") + # self.model.to("cuda") + self.model = move_to_gpu(self.model) self.model.eval() self.offload_before_refit() def prepare_for_training(self, *args, **kwargs): # onload models and optimizer state to cuda - self.model.to("cuda") + # self.model.to("cuda") + self.model = move_to_gpu(self.model) self.model.train() # Move optimizer state to CUDA if it exists @@ -754,7 +936,7 @@ def offload_before_refit(self): @torch.no_grad() def offload_after_refit(self): # Offload as much as possible on the CPU - self.model = self.move_to_cpu(self.model) + self.model = move_to_cpu(self.model) self.model.eval() torch.randn(1).cuda() # wake up torch allocator self.offload_before_refit() # rerun the old offload function @@ -771,8 +953,8 @@ def offload_after_refit(self): print( f"GPU Memory after refit complete: {allocated:.2f}GB allocated, {reserved:.2f}GB reserved" ) - - def move_to_cpu(self, model): + + def move_to_cpu_old1(self, model): for param in model.parameters(): param.data = param.data.to("cpu") From 39cd9561d7224e5da673faf899b5233a58826897 Mon Sep 17 00:00:00 2001 From: Parth Chadha Date: Tue, 25 Mar 2025 17:12:24 -0700 Subject: [PATCH 02/27] fix: Remove reference of tokenizer from generation backend (#75) (#82) Signed-off-by: Parth Chadha --- docs/design_docs/generation.md | 14 +++++- examples/run_grpo_math.py | 5 ++- nemo_reinforcer/algorithms/grpo.py | 7 +++ .../models/generation/interfaces.py | 4 +- nemo_reinforcer/models/generation/vllm.py | 33 ++++++++++---- .../models/generation/test_vllm_generation.py | 44 ++++++++++++++++++- 6 files changed, 91 insertions(+), 16 deletions(-) diff --git a/docs/design_docs/generation.md b/docs/design_docs/generation.md index e9fa3ee2ee..8dda8a028a 100644 --- a/docs/design_docs/generation.md +++ b/docs/design_docs/generation.md @@ -95,20 +95,30 @@ The {py:class}`UpdatableVllmInternalWorker Tuple[ @@ -219,6 +220,12 @@ def setup( # vllm model loading prefers clean environment, initialize policy_generation before policy (#52 will fix this) backend = generation_config["backend"] generation_config["model_name"] = policy_config["model_name"] # Needed for vLLM + generation_config["vllm_cfg"]["skip_tokenizer_init"] = True + # When https://github.com/NVIDIA/reinforcer/issues/57 is fixed, we should update stop_token_ids below. + generation_config["stop_token_ids"] = [tokenizer.eos_token_id] + generation_config["pad_token"] = tokenizer.pad_token_id + generation_config["vllm_cfg"]["load_format"] = "dummy" + if backend == "hf": policy_generation = None print(f" ✓ Using HF backend for generation with {policy_config['model_name']}") diff --git a/nemo_reinforcer/models/generation/interfaces.py b/nemo_reinforcer/models/generation/interfaces.py index 8ffa1d2945..138b70fbc1 100644 --- a/nemo_reinforcer/models/generation/interfaces.py +++ b/nemo_reinforcer/models/generation/interfaces.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. from abc import ABC, abstractmethod -from typing import Any, TypedDict, Union, Tuple +from typing import Any, TypedDict, Union, Tuple, List import torch from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict @@ -101,6 +101,8 @@ class GenerationConfig(TypedDict): top_p: float top_k: int model_name: str + stop_token_ids: List[int] + pad_token: int class GenerationDatumSpec(TypedDict): diff --git a/nemo_reinforcer/models/generation/vllm.py b/nemo_reinforcer/models/generation/vllm.py index ebbe53cff5..cb60b7fe8c 100644 --- a/nemo_reinforcer/models/generation/vllm.py +++ b/nemo_reinforcer/models/generation/vllm.py @@ -39,6 +39,9 @@ class VllmSpecificArgs(TypedDict): tensor_parallel_size: int gpu_memory_utilization: float max_model_len: int + # Additional arguments for vLLM inserted by reinforcer based on the context of when vllm is used + skip_tokenizer_init: bool + load_format: str class VllmConfig(GenerationConfig): @@ -110,6 +113,7 @@ def __init__( Only needed for the first worker in each tied worker group. """ self.cfg = config + self.model_name = self.cfg["model_name"] self.tensor_parallel_size = self.cfg["vllm_cfg"]["tensor_parallel_size"] self.gpu_memory_utilization = self.cfg["vllm_cfg"]["gpu_memory_utilization"] @@ -166,9 +170,11 @@ def __init__( self.llm = LLM( model=self.model_name, - load_format="dummy", - tensor_parallel_size=self.tensor_parallel_size, - gpu_memory_utilization=self.gpu_memory_utilization, + # Training pipeline will set this to "dummy" and eval will load real weights using 'auto' + load_format=self.cfg["vllm_cfg"]["load_format"], + skip_tokenizer_init=self.cfg["vllm_cfg"]["skip_tokenizer_init"], + tensor_parallel_size=self.cfg["vllm_cfg"]["tensor_parallel_size"], + gpu_memory_utilization=self.cfg["vllm_cfg"]["gpu_memory_utilization"], enable_prefix_caching=True, dtype="auto", enforce_eager=True, @@ -176,13 +182,10 @@ def __init__( trust_remote_code=True, worker_cls=UpdatableVllmInternalWorker, enable_sleep_mode=True, + disable_log_stats=True, **vllm_kwargs, ) - self.tokenizer = AutoTokenizer.from_pretrained(self.model_name) - if self.tokenizer.pad_token is None: - self.tokenizer.pad_token = self.tokenizer.eos_token - def llm(self): return self.llm @@ -213,7 +216,7 @@ def generate( f"input_ids and input_lengths must be present in the BatchedDataDict, got keys: {data.keys()}" ) is_right_padded, error_msg = verify_right_padding( - data, pad_value=self.tokenizer.pad_token_id + data, pad_value=self.cfg["pad_token"] ) if not is_right_padded: warnings.warn( @@ -251,6 +254,7 @@ def generate( max_tokens=self.cfg["max_new_tokens"], logprobs=0, # Return logprobs for the generated tokens stop=None, + stop_token_ids=self.cfg["stop_token_ids"], ) # Generate outputs @@ -276,7 +280,7 @@ def generate( # Create a new tensor with the right size and fill with padding token full_output = torch.full( - (total_length,), self.tokenizer.pad_token_id, dtype=input_ids.dtype + (total_length,), self.cfg["pad_token"], dtype=input_ids.dtype ) # Copy original input (with padding) into the beginning @@ -402,6 +406,17 @@ def __init__( """Initialize a vLLM policy with distributed workers.""" # Store config self.cfg = config + # Ensure all required VllmConfig fields are present + missing_keys = [ + key for key in VllmConfig.__annotations__ if key not in self.cfg + ] + assert not missing_keys, ( + f"VLLM Configuration Error: Missing required keys in VllmConfig.\n" + f"Missing keys: {', '.join(missing_keys)}\n" + f"Provided keys: {', '.join(self.cfg.keys())}\n" + f"Please update your configuration to include all required VLLM parameters." + ) + self.tensor_parallel_size = self.cfg["vllm_cfg"]["tensor_parallel_size"] # Create worker builder for VllmGenerationWorker diff --git a/tests/unit/models/generation/test_vllm_generation.py b/tests/unit/models/generation/test_vllm_generation.py index 9c89046af2..af8b5698e3 100644 --- a/tests/unit/models/generation/test_vllm_generation.py +++ b/tests/unit/models/generation/test_vllm_generation.py @@ -25,6 +25,7 @@ # Define basic vLLM test config basic_vllm_test_config: VllmConfig = { + "backend": "vllm", "model_name": "meta-llama/Llama-3.2-1B", # Small model for testing "dtype": "bfloat16", "max_new_tokens": 10, @@ -39,6 +40,15 @@ } +def configure_vllm_with_tokenizer(vllm_config, tokenizer): + """Apply tokenizer-specific configurations to vLLM config.""" + vllm_config["vllm_cfg"]["skip_tokenizer_init"] = True + vllm_config["vllm_cfg"]["load_format"] = "dummy" + vllm_config["pad_token"] = tokenizer.pad_token_id + vllm_config["stop_token_ids"] = [tokenizer.eos_token_id] + return vllm_config + + @pytest.fixture(scope="module") def check_vllm_available(): """Skip tests if vLLM is not installed.""" @@ -74,9 +84,12 @@ def tokenizer(): @pytest.fixture(scope="function") -def policy(cluster, check_vllm_available): +def policy(cluster, tokenizer, check_vllm_available): """Initialize the vLLM policy.""" - policy = VllmGeneration(cluster, basic_vllm_test_config) + # Create separate configs for each policy + vllm_config = basic_vllm_test_config.copy() + vllm_config = configure_vllm_with_tokenizer(vllm_config, tokenizer) + policy = VllmGeneration(cluster, vllm_config) yield policy # Ensure policy is properly shutdown @@ -121,6 +134,30 @@ def test_input_data(tokenizer): ) +def test_vllm_missing_required_config_key(cluster, check_vllm_available): + """Test that an assertion error is raised when a required config key is missing.""" + # Create a config missing a required key by removing 'model_name' + incomplete_config = basic_vllm_test_config.copy() + del incomplete_config["model_name"] # Remove a required key + + # Also need to ensure skip_tokenizer_init and load_format are there + # since these are checked in VllmConfig.__annotations__ + incomplete_config["skip_tokenizer_init"] = True + incomplete_config["load_format"] = "auto" + + # Attempt to initialize VllmGeneration with incomplete config - should raise AssertionError + with pytest.raises(AssertionError) as excinfo: + VllmGeneration(cluster, incomplete_config) + + # Verify the error message contains information about the missing key + error_message = str(excinfo.value) + assert "Missing required keys in VllmConfig" in error_message + assert "model_name" in error_message, ( + "Error should mention the missing 'model_name' key" + ) + print(f"Successfully caught missing config key with error: {error_message}") + + def test_vllm_policy_generation(policy, test_input_data, tokenizer): """Test vLLM policy generation capabilities.""" # Test generation @@ -171,6 +208,7 @@ def test_vllm_generation_with_hf_training(cluster, tokenizer): # Create separate configs for each policy vllm_config = basic_vllm_test_config.copy() + vllm_config = configure_vllm_with_tokenizer(vllm_config, tokenizer) # Create HF-specific config with required parameters hf_config = { @@ -359,6 +397,7 @@ def test_vllm_policy_tensor_parallel(cluster, tokenizer): """Test vLLM policy with tensor parallelism > 1.""" # Configure with tensor_parallel_size=2 tp_config = basic_vllm_test_config.copy() + tp_config = configure_vllm_with_tokenizer(tp_config, tokenizer) tp_config["tensor_parallel_size"] = 2 # Ensure we specify the distributed executor backend @@ -420,6 +459,7 @@ def test_vllm_policy_weight_update(cluster, tokenizer, tensor_parallel_size): # Create separate configs for each policy vllm_config = basic_vllm_test_config.copy() + vllm_config = configure_vllm_with_tokenizer(vllm_config, tokenizer) vllm_config["tensor_parallel_size"] = tensor_parallel_size # Add vllm_kwargs only if using tensor parallelism From f8e51a5dafd4565d432088cd6b82a3e1d5f92912 Mon Sep 17 00:00:00 2001 From: Terry Kong Date: Tue, 25 Mar 2025 22:02:15 -0700 Subject: [PATCH 03/27] feat: unit test metric tracking (#40) Signed-off-by: Terry Kong Signed-off-by: Yi-Fu Wu --- .github/workflows/_run_test.yml | 18 +- .github/workflows/cicd-main.yml | 7 + docs/testing.md | 65 +++++++ nemo_reinforcer/utils/logger.py | 96 ++++++++-- tests/run_unit.sh | 2 +- tests/unit/conftest.py | 168 ++++++++++++++++++ .../unit/models/policy/test_hf_ray_policy.py | 4 +- tests/unit/utils/test_logger.py | 8 +- 8 files changed, 347 insertions(+), 21 deletions(-) diff --git a/.github/workflows/_run_test.yml b/.github/workflows/_run_test.yml index 813719503c..3081d9b204 100644 --- a/.github/workflows/_run_test.yml +++ b/.github/workflows/_run_test.yml @@ -27,11 +27,16 @@ on: default: 10 SCRIPT: type: string - description: Test script to execute + description: Test script to execute in container required: true AFTER_SCRIPT: type: string - description: Script to run after main test + description: Script to run after main test in container + required: false + default: ":" + FINAL_SCRIPT_EXTERNAL: + type: string + description: Script to run after SCRIPT and AFTER_SCRIPT, but outside container (useful for logging) required: false default: ":" IS_OPTIONAL: @@ -163,6 +168,15 @@ jobs: ) docker exec nemo_container_${{ github.run_id }} bash -eux -o pipefail -c "$cmd" + - name: final_script_external + if: always() && inputs.FINAL_SCRIPT_EXTERNAL != ':' + run: | + cmd=$(cat <<"RUN_TEST_EOF" + ${{ inputs.FINAL_SCRIPT_EXTERNAL }} + RUN_TEST_EOF + ) + bash -eux -o pipefail -c "$cmd" + - name: Container shutdown if: always() run: | diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml index 0e2a289c62..05dcefaf86 100644 --- a/.github/workflows/cicd-main.yml +++ b/.github/workflows/cicd-main.yml @@ -154,6 +154,13 @@ jobs: SCRIPT: | cd ${REINFORCER_REPO_DIR} uv run --extra test bash -x ./tests/run_unit.sh + FINAL_SCRIPT_EXTERNAL: | + cat < None: self.run.config.update(params) +class GpuMetricSnapshot(TypedDict): + step: int + metrics: Dict[str, Any] + + class RayGpuMonitorLogger: """Monitor GPU utilization across a Ray cluster and log metrics to a parent logger.""" @@ -163,7 +168,9 @@ def __init__( self.collection_interval = collection_interval self.flush_interval = flush_interval self.parent_logger = parent_logger - self.metrics_buffer = [] # Store metrics with timestamps + self.metrics_buffer: list[ + GpuMetricSnapshot + ] = [] # Store metrics with timestamps self.last_flush_time = time.time() self.is_running = False self.collection_thread = None @@ -228,7 +235,9 @@ def _collection_loop(self): time.sleep(self.collection_interval) except Exception as e: - print(f"Error in GPU monitoring collection loop: {e}") + print( + f"Error in GPU monitoring collection loop or stopped abruptly: {e}" + ) time.sleep(self.collection_interval) # Continue despite errors def _parse_gpu_metric(self, sample: Sample, node_idx: int) -> Dict[str, Any]: @@ -241,7 +250,6 @@ def _parse_gpu_metric(self, sample: Sample, node_idx: int) -> Dict[str, Any]: Returns: Dictionary with metric name and value """ - # TODO: Consider plumbing {'GpuDeviceName': 'NVIDIA H100 80GB HBM3'} # Expected labels for GPU metrics expected_labels = ["GpuIndex"] for label in expected_labels: @@ -266,12 +274,72 @@ def _parse_gpu_metric(self, sample: Sample, node_idx: int) -> Dict[str, Any]: metric_name = f"node.{node_idx}.gpu.{index}.{metric_name}" return {metric_name: value} + def _parse_gpu_sku(self, sample: Sample, node_idx: int) -> Dict[str, str]: + """Parse a GPU metric sample into a standardized format. + + Args: + sample: Prometheus metric sample + node_idx: Index of the node + + Returns: + Dictionary with metric name and value + """ + # TODO: Consider plumbing {'GpuDeviceName': 'NVIDIA H100 80GB HBM3'} + # Expected labels for GPU metrics + expected_labels = ["GpuIndex", "GpuDeviceName"] + for label in expected_labels: + if label not in sample.labels: + # This is probably a CPU node + return {} + + metric_name = sample.name + # Only return SKU if the metric is one of these which publish these metrics + if ( + metric_name != "ray_node_gpus_utilization" + and metric_name != "ray_node_gram_used" + ): + # Skip unexpected metrics + return {} + + labels = sample.labels + index = labels["GpuIndex"] + value = labels["GpuDeviceName"] + + metric_name = f"node.{node_idx}.gpu.{index}.type" + return {metric_name: value} + + def _collect_gpu_sku(self) -> Dict[str, str]: + """Collect GPU SKU from all Ray nodes. + + Note: This is an internal API and users are not expected to call this. + + Returns: + Dictionary of SKU types on all Ray nodes + """ + # TODO: We can re-use the same path for metrics because even though both utilization and memory metrics duplicate + # the GPU metadata information; since the metadata is the same for each node, we can overwrite it and expect them to + # be the same + return self._collect(sku=True) + def _collect_metrics(self) -> Dict[str, Any]: """Collect GPU metrics from all Ray nodes. Returns: Dictionary of collected metrics """ + return self._collect(metrics=True) + + def _collect(self, metrics: bool = False, sku: bool = False) -> Dict[str, Any]: + """Collect GPU metrics from all Ray nodes. + + Returns: + Dictionary of collected metrics + """ + assert metrics ^ sku, ( + f"Must collect either metrics or sku, not both: {metrics=}, {sku=}" + ) + parser_fn = self._parse_gpu_metric if metrics else self._parse_gpu_sku + if not ray.is_initialized(): print("Ray is not initialized. Cannot collect GPU metrics.") return {} @@ -295,7 +363,9 @@ def _collect_metrics(self) -> Dict[str, Any]: # Process each node's metrics collected_metrics = {} for node_idx, metric_address in enumerate(unique_metric_addresses): - gpu_metrics = self._fetch_and_parse_metrics(node_idx, metric_address) + gpu_metrics = self._fetch_and_parse_metrics( + node_idx, metric_address, parser_fn + ) collected_metrics.update(gpu_metrics) return collected_metrics @@ -304,7 +374,7 @@ def _collect_metrics(self) -> Dict[str, Any]: print(f"Error collecting GPU metrics: {e}") return {} - def _fetch_and_parse_metrics(self, node_idx, metric_address): + def _fetch_and_parse_metrics(self, node_idx, metric_address, parser_fn): """Fetch metrics from a node and parse GPU metrics. Args: @@ -335,7 +405,7 @@ def _fetch_and_parse_metrics(self, node_idx, metric_address): continue for sample in family.samples: - metrics = self._parse_gpu_metric(sample, node_idx) + metrics = parser_fn(sample, node_idx) gpu_metrics.update(metrics) return gpu_metrics @@ -346,18 +416,16 @@ def _fetch_and_parse_metrics(self, node_idx, metric_address): def flush(self): """Flush collected metrics to the parent logger.""" - if not self.parent_logger: - return - with self.lock: if not self.metrics_buffer: return - # Log each set of metrics with its original step - for entry in self.metrics_buffer: - step = entry["step"] - metrics = entry["metrics"] - self.parent_logger.log_metrics(metrics, step, prefix="ray") + if self.parent_logger: + # Log each set of metrics with its original step + for entry in self.metrics_buffer: + step = entry["step"] + metrics = entry["metrics"] + self.parent_logger.log_metrics(metrics, step, prefix="ray") # Clear buffer after logging self.metrics_buffer = [] diff --git a/tests/run_unit.sh b/tests/run_unit.sh index e826a18a60..f51ff49ff6 100755 --- a/tests/run_unit.sh +++ b/tests/run_unit.sh @@ -33,7 +33,7 @@ export RAY_DEDUP_LOGS=0 # Run unit tests echo "Running unit tests..." -if ! pytest unit/ -s -rA "$@"; then +if ! pytest unit/ --cov=nemo_reinforcer --cov-report=term --cov-report=json -s -rA "$@"; then echo "[ERROR]: Unit tests failed." exit 1 fi diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 67de3a36af..b13d6588a4 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -11,7 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from io import StringIO +import time import pytest +from nemo_reinforcer.utils.logger import GPUMonitoringConfig +from tests import unit import torch import torch.distributed as dist import torch.multiprocessing as mp @@ -28,7 +32,153 @@ import random from typing import Callable import ray +import json from nemo_reinforcer.distributed.virtual_cluster import init_ray +from typing import TypedDict +from datetime import datetime + +dir_path = os.path.dirname(os.path.abspath(__file__)) + +UNIT_RESULTS_FILE = os.path.join(dir_path, "unit_results.json") +UNIT_RESULTS_FILE_DATED = os.path.join( + dir_path, f"unit_results/{datetime.now().strftime('%Y%m%d_%H%M%S')}.json" +) + + +class UnitTestData(TypedDict): + exit_status: int | str + git_commit: str + start_time: str + metrics: dict + gpu_types: list[str] + coverage: dict + + +def pytest_sessionstart(session): + # Delete the unit results file at the start of a new test session + if os.path.exists(UNIT_RESULTS_FILE): + try: + os.remove(UNIT_RESULTS_FILE) + print(f"Deleted existing results file: {UNIT_RESULTS_FILE}") + except Exception as e: + print(f"Warning: Failed to delete results file: {e}") + + # Get the git commit hash + try: + import subprocess + + result = subprocess.run( + ["git", "-C", dir_path, "rev-parse", "HEAD"], + capture_output=True, + text=True, + check=True, + ) + git_commit = result.stdout.strip() + except Exception as e: + git_commit = f"Error getting git commit: {str(e)}" + + session.config._unit_test_data = UnitTestData( + exit_status="was not set", + git_commit=git_commit, + start_time=datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + metrics={}, + gpu_types=[], + coverage={}, + ) + + +@pytest.fixture(scope="session", autouse=True) +def session_data(request, init_ray_cluster): + """Session-level fixture to store and save metrics data. + + This fixture tracks both metrics from tests and metadata about the test environment. + The metrics are stored in the 'metrics' dictionary. + + It's set to autouse so that we track metadata and coverage even if no test selected + explicitly track metrics. + """ + # Pass init_ray_cluster so that we can access ray metadata + + ############################################################ + # 1. Gather all the unit test data # + ############################################################ + unit_test_data: UnitTestData = request.config._unit_test_data + yield unit_test_data + + ############################################################ + # 2. Gather the ray metadata # + ############################################################ + from nemo_reinforcer.utils.logger import RayGpuMonitorLogger + + logger = RayGpuMonitorLogger( + collection_interval=float("inf"), + flush_interval=float("inf"), + parent_logger=None, + ) + unit_test_data["gpu_types"] = list(set(logger._collect_gpu_sku().values())) + + ############################################################ + # 3. Gather the coverage data # + ############################################################ + # We directly access the coverage controller from the plugin manager + # so we can access the coverage total before the pytest session finishes. + if request.config.pluginmanager.hasplugin("_cov"): + plugin = request.config.pluginmanager.getplugin("_cov") + if plugin.cov_controller: + cov_controller = plugin.cov_controller + # We currently don't use the cov_report since we can always access the coverage.json later, but + # in the future if we want to report the coverage more granularly as part of the session finish, + # we can access it here. + cov_report = StringIO() + cov_total = cov_controller.summary(cov_report) + unit_test_data["coverage"] = cov_total + + +@pytest.fixture +def tracker(request, session_data, ray_gpu_monitor): + """Test-level fixture that automatically captures test function info.""" + # Get fully qualified test name (module::test_function) + module_name = request.module.__name__ + test_name = request.function.__name__ + qualified_name = f"{module_name}::{test_name}" + + # Initialize an empty dict for this test if it doesn't exist + if qualified_name not in session_data: + session_data["metrics"][qualified_name] = {} + + class Tracker: + def track(self, metric_name: str, value): + """Tracking an arbitrary metric.""" + session_data["metrics"][qualified_name][metric_name] = value + + def get_max_mem(self): + metrics = ray_gpu_monitor._collect_metrics() + max_mem = 0 + for m_name, m_value in metrics.items(): + if m_name.endswith(".memory"): + max_mem = max(max_mem, m_value) + return max_mem + + def log_max_mem(self, metric_name: str): + session_data["metrics"][qualified_name][metric_name] = self.get_max_mem() + + start_time = time.time() + yield Tracker() + end_time = time.time() + # Prefix with `_` to indicate it's automatically collected + session_data["metrics"][qualified_name]["_elapsed"] = end_time - start_time + + +def pytest_sessionfinish(session, exitstatus): + data = session.config._unit_test_data + data["exit_status"] = exitstatus + print(f"\nSaving unit test data to {UNIT_RESULTS_FILE}") + print(f"and saving to {UNIT_RESULTS_FILE_DATED}") + with open(UNIT_RESULTS_FILE, "w") as f: + json.dump(data, f, indent=2) + os.makedirs(os.path.dirname(UNIT_RESULTS_FILE_DATED), exist_ok=True) + with open(UNIT_RESULTS_FILE_DATED, "w") as f: + json.dump(data, f, indent=2) @pytest.fixture(scope="session", autouse=True) @@ -42,6 +192,24 @@ def init_ray_cluster(): ray.shutdown() +@pytest.fixture(scope="session", autouse=True) +def ray_gpu_monitor(init_ray_cluster): + """Initialize Ray for the test module and clean up afterward. + + This fixture doesn't need to be called directly. + """ + from nemo_reinforcer.utils.logger import RayGpuMonitorLogger + + gpu_monitor = RayGpuMonitorLogger( + collection_interval=1, + flush_interval=float("inf"), # Disabling flushing since we will do it manually + parent_logger=None, + ) + gpu_monitor.start() + yield gpu_monitor + gpu_monitor.stop() + + def _setup_distributed(rank, world_size, port, backend="nccl"): """Initialize the distributed environment for a test (internal use only)""" os.environ["MASTER_ADDR"] = "localhost" diff --git a/tests/unit/models/policy/test_hf_ray_policy.py b/tests/unit/models/policy/test_hf_ray_policy.py index ae80b3fd1e..29d8cbcbee 100644 --- a/tests/unit/models/policy/test_hf_ray_policy.py +++ b/tests/unit/models/policy/test_hf_ray_policy.py @@ -364,7 +364,7 @@ def generation_setup(): @pytest.mark.timeout(180) -def test_hf_policy_generation(generation_setup): +def test_hf_policy_generation(generation_setup, tracker): policy, cluster, data, tokenizer, prompts, expected_generations = generation_setup # Verify resources were created properly @@ -415,6 +415,7 @@ def test_hf_policy_generation(generation_setup): torch.exp(torch.abs(results["logprobs"] - fprop_results["logprobs"])) ) print(f"avg prob mult error: {avg_prob_mult_error}") + tracker.track("avg_prob_mult_error", float(avg_prob_mult_error)) assert avg_prob_mult_error <= 1.025 # get logprobs for the expected generations @@ -439,6 +440,7 @@ def test_hf_policy_generation(generation_setup): expected_logprobs = policy.get_logprobs(expected_data)["logprobs"] mean_lps = torch.mean(expected_logprobs * expected_tokenized["attention_mask"]) + tracker.track("mean_lps", float(mean_lps)) assert mean_lps > -1.7, "Expected logprobs should be greater than -1.7" assert mean_lps < -1.4, "Expected logprobs should be less than -1.4" diff --git a/tests/unit/utils/test_logger.py b/tests/unit/utils/test_logger.py index a8d982266b..dd1c24fb27 100644 --- a/tests/unit/utils/test_logger.py +++ b/tests/unit/utils/test_logger.py @@ -417,7 +417,9 @@ def test_fetch_and_parse_metrics(self, mock_get, mock_ray): # Call the method result = monitor._fetch_and_parse_metrics( - node_idx=2, metric_address="test_ip:test_port" + node_idx=2, + metric_address="test_ip:test_port", + parser_fn=monitor._parse_gpu_metric, ) # Verify request was made correctly @@ -460,8 +462,8 @@ def test_collect_metrics(self, mock_ray): # Verify _fetch_and_parse_metrics was called for each node assert mock_fetch.call_count == 2 - mock_fetch.assert_any_call(0, "10.0.0.1:8080") - mock_fetch.assert_any_call(1, "10.0.0.2:8080") + mock_fetch.assert_any_call(0, "10.0.0.1:8080", monitor._parse_gpu_metric) + mock_fetch.assert_any_call(1, "10.0.0.2:8080", monitor._parse_gpu_metric) # Verify the result combines metrics from all nodes assert result == { From 778f5b0927fcc9b58b82b2381d7d6701e6a95447 Mon Sep 17 00:00:00 2001 From: Terry Kong Date: Wed, 26 Mar 2025 10:29:07 -0700 Subject: [PATCH 04/27] fix: unit test error when coverage wasn't specified (#88) Signed-off-by: Terry Kong Signed-off-by: Yi-Fu Wu --- tests/unit/conftest.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index b13d6588a4..1dbbd6a169 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -51,7 +51,7 @@ class UnitTestData(TypedDict): start_time: str metrics: dict gpu_types: list[str] - coverage: dict + coverage: str def pytest_sessionstart(session): @@ -83,7 +83,7 @@ def pytest_sessionstart(session): start_time=datetime.now().strftime("%Y-%m-%d %H:%M:%S"), metrics={}, gpu_types=[], - coverage={}, + coverage="[n/a] run with --cov=nemo_reinforcer", ) @@ -122,10 +122,16 @@ def session_data(request, init_ray_cluster): ############################################################ # We directly access the coverage controller from the plugin manager # so we can access the coverage total before the pytest session finishes. + cov_controller = None if request.config.pluginmanager.hasplugin("_cov"): plugin = request.config.pluginmanager.getplugin("_cov") if plugin.cov_controller: cov_controller = plugin.cov_controller + + if not cov_controller: + # Means the user didn't run with --cov=... + return + # We currently don't use the cov_report since we can always access the coverage.json later, but # in the future if we want to report the coverage more granularly as part of the session finish, # we can access it here. From c46581ea74e3a87f985848abe5c8bcf7468d0437 Mon Sep 17 00:00:00 2001 From: Terry Kong Date: Wed, 26 Mar 2025 11:54:25 -0700 Subject: [PATCH 05/27] ci: temporarily disable CI on main since PRs must be up to date before merge (#91) Signed-off-by: Terry Kong Signed-off-by: Yi-Fu Wu --- .github/workflows/cicd-main.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml index 05dcefaf86..2d2255e7f2 100644 --- a/.github/workflows/cicd-main.yml +++ b/.github/workflows/cicd-main.yml @@ -28,9 +28,10 @@ on: default: all type: string description: Comma-separated list of tests to run. Use "all" to run the full test suite. - push: - branches: - - 'main' + # TODO: Due to limited compute, disabling pushes to main. This is okay to do since we force PRs to be up to date and the CI tests on pull/$PR_NUM/merge + #push: + # branches: + # - 'main' concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-${{ github.event.label.name || 'main' }} From dc9d857cb616125b1c2bd20361ecd9628a1915c1 Mon Sep 17 00:00:00 2001 From: Yi-Fu Wu Date: Wed, 26 Mar 2025 18:29:58 -0700 Subject: [PATCH 06/27] Cleanup + expandable_segments Signed-off-by: Yi-Fu Wu --- nemo_reinforcer/distributed/worker_groups.py | 4 +- nemo_reinforcer/models/policy/hf_policy.py | 177 ++----------------- 2 files changed, 12 insertions(+), 169 deletions(-) diff --git a/nemo_reinforcer/distributed/worker_groups.py b/nemo_reinforcer/distributed/worker_groups.py index 93d4808b75..2497065b0f 100644 --- a/nemo_reinforcer/distributed/worker_groups.py +++ b/nemo_reinforcer/distributed/worker_groups.py @@ -125,7 +125,6 @@ def __call__( num_gpus=num_gpus, bundle_indices=bundle_indices, ) - print("HERE worker_class", worker_class, "env_vars", env_vars) # Apply resource configuration if resources and "num_gpus" in resources: @@ -135,7 +134,8 @@ def __call__( if env_vars: if "runtime_env" not in options: options["runtime_env"] = {} - options["runtime_env"]["env_vars"] = env_vars + for k, v in env_vars.items(): + options["runtime_env"]["env_vars"][k] = v # Apply initialization parameters if init_kwargs: diff --git a/nemo_reinforcer/models/policy/hf_policy.py b/nemo_reinforcer/models/policy/hf_policy.py index 6478eda6e2..08eb9061ce 100644 --- a/nemo_reinforcer/models/policy/hf_policy.py +++ b/nemo_reinforcer/models/policy/hf_policy.py @@ -21,8 +21,6 @@ import ray import torch from torch.distributed.device_mesh import init_device_mesh -from torch.distributed.fsdp.api import CPUOffload -from torch.distributed.fsdp._runtime_utils import _lazy_init from torch.distributed.fsdp import ( FullyShardedDataParallel, FullStateDictConfig, @@ -48,164 +46,36 @@ from nemo_reinforcer.distributed.virtual_cluster import ( PY_EXECUTABLES, ) -# def move_to_cpu(model): -# return model.to("cpu") -# def move_to_cpu(model): -# for param in model.parameters(): -# param.data = param.data.to("cpu") -# for buffer in model.buffers(): -# buffer.data = buffer.data.to("cpu") -# if hasattr(model, "_fsdp_wrapped_module"): -# model._fsdp_wrapped_module.to("cpu") - -# return model - -# def move_to_gpu(model): -# return model.to("cuda") def move_to_cpu(model): - # offload2 - # if not isinstance(model, FullyShardedDataParallel): - # return model.to("cpu") - for _, param in model.named_parameters(): param.data = param.data.to("cpu", non_blocking=True) if hasattr(param, "_local_shard"): - # param._local_shard = param._local_shard.to("cpu", non_blocking=True) param._local_shard = param.data - # param.data = param.data.to("cpu", non_blocking=True) if param.grad is not None: param.grad = param.grad.to("cpu", non_blocking=True) - # for buffer in model.buffers(): - # buffer.data = buffer.data.to("cpu") - # if torch.distributed.get_rank() == 0: - # import pdb; pdb.set_trace() if hasattr(model, "_fsdp_wrapped_module"): move_to_cpu(model._fsdp_wrapped_module) - # model._fsdp_wrapped_module.to("cpu") return model -def move_to_gpu(model): - # offload2 - # if not isinstance(model, FullyShardedDataParallel): - # return model.to("cuda") +def move_to_gpu(model): for _, param in model.named_parameters(): param.data = param.data.to("cuda", non_blocking=True) if hasattr(param, "_local_shard"): - # param._local_shard = param._local_shard.to("cuda", non_blocking=True) param._local_shard = param.data - # param.data = param.data.to("cuda", non_blocking=True) if param.grad is not None: param.grad = param.grad.to("cuda", non_blocking=True) - # for buffer in model.buffers(): - # buffer.data = buffer.data.to("cuda") if hasattr(model, "_fsdp_wrapped_module"): move_to_gpu(model._fsdp_wrapped_module) return model -# def move_to_cpu(model): -# if not isinstance(model, FullyShardedDataParallel): -# model = model.to("cpu") -# return model -# # lazy init FSDP model -# _lazy_init(model, model) -# # assert model._is_root, f"Only support root model offloading to CPU" -# # # if torch.distributed.get_rank() == 0: -# # # import ipdb; ipdb.set_trace() -# for handle in model._all_handles: -# if handle._offload_params: -# continue -# flat_param = handle.flat_param -# # assert flat_param.data.data_ptr() == flat_param._local_shard.data_ptr() and \ -# # id(flat_param.data) != id(flat_param._local_shard) and \ -# # flat_param.data.size() == flat_param._local_shard.size() -# handle.flat_param_to(torch.device("cpu"), non_blocking=True) -# # the following still keeps id(._local_shard) != id(.data) -# flat_param._local_shard = flat_param.data -# # assert id(flat_param._local_shard) != id(flat_param.data) -# return model - -# def move_to_gpu(model): -# if not isinstance(model, FullyShardedDataParallel): -# model = model.to("cuda") -# return model -# # lazy init FSDP model -# _lazy_init(model, model) -# # assert model._is_root, f"Only support root model loading to GPU" -# device_id = torch.cuda.current_device() -# for handle in model._all_handles: -# if handle._offload_params: -# continue -# flat_param = handle.flat_param -# handle.flat_param_to(torch.device(f"cuda:{device_id}"), non_blocking=True) -# # the following still keeps id(._local_shard) != id(.data) -# flat_param._local_shard = flat_param.data -# return model - - -# def move_to_cpu(model): -# if not isinstance(model, FullyShardedDataParallel): -# model = model.to("cpu") -# return model -# # lazy init FSDP model -# _lazy_init(model, model) -# assert model._is_root, f"Only support root model offloading to CPU" -# # if torch.distributed.get_rank() == 0: -# # import ipdb; ipdb.set_trace() -# for handle in model._all_handles: -# if handle._offload_params: -# continue -# flat_param = handle.flat_param -# assert flat_param.data.data_ptr() == flat_param._local_shard.data_ptr() and \ -# id(flat_param.data) != id(flat_param._local_shard) and \ -# flat_param.data.size() == flat_param._local_shard.size() -# handle.flat_param_to(torch.device("cpu"), non_blocking=True) -# # the following still keeps id(._local_shard) != id(.data) -# flat_param._local_shard = flat_param.data -# assert id(flat_param._local_shard) != id(flat_param.data) - -# for param in model.parameters(): -# param.data = param.data.to("cpu") -# for buffer in model.buffers(): -# buffer.data = buffer.data.to("cpu") -# if hasattr(model, "_fsdp_wrapped_module"): -# move_to_cpu(model._fsdp_wrapped_module) -# # model._fsdp_wrapped_module.to("cpu") - -# return model - -# def move_to_gpu(model): -# if not isinstance(model, FullyShardedDataParallel): -# model = model.to("cuda") -# return model -# # lazy init FSDP model -# _lazy_init(model, model) -# assert model._is_root, f"Only support root model loading to GPU" -# device_id = torch.cuda.current_device() -# for handle in model._all_handles: -# if handle._offload_params: -# continue -# flat_param = handle.flat_param -# handle.flat_param_to(torch.device(f"cuda:{device_id}"), non_blocking=True) -# # the following still keeps id(._local_shard) != id(.data) -# flat_param._local_shard = flat_param.data - -# for param in model.parameters(): -# param.data = param.data.to("cuda") -# for buffer in model.buffers(): -# buffer.data = buffer.data.to("cuda") -# if hasattr(model, "_fsdp_wrapped_module"): -# move_to_gpu(model._fsdp_wrapped_module) - -# return model - @ray.remote class HfPolicyWorker: DEFAULT_PY_EXECUTABLE = PY_EXECUTABLES.DEFAULT_VENV @@ -262,15 +132,6 @@ def __init__( # (Initialize device mesh, shard submodules, then shard entire model) # ------------------------------------------------ - @staticmethod - def configure_worker( - num_gpus: int | float, bundle_indices: Optional[list] = None - ) -> tuple[dict, dict, dict]: - env_vars = { - "PYTORCH_CUDA_ALLOC_CONF": "expandable_segments:True" - } - return None, env_vars, None - def do_fsdp(model): # Create a device mesh with 'world_size' GPUs in a 1D arrangement. mesh = init_device_mesh("cuda", (world_size,)) @@ -280,30 +141,21 @@ def do_fsdp(model): buffer_dtype=torch.float32, ) - # return FullyShardedDataParallel( - # model, - # device_mesh=mesh, - # auto_wrap_policy=size_based_auto_wrap_policy, - # ) return FullyShardedDataParallel( model, device_mesh=mesh, auto_wrap_policy=size_based_auto_wrap_policy, mixed_precision=mp_policy, - # cpu_offload=CPUOffload(offload_params=True), ) - # self.model.to("cuda") - self.model = move_to_gpu(self.model) + self.model.to("cuda") self.model = do_fsdp(self.model) self.model = move_to_cpu(self.model) - # self.reference_model.to("cuda") - self.reference_model = move_to_gpu(self.reference_model) + self.reference_model.to("cuda") self.reference_model = do_fsdp(self.reference_model) self.reference_model = move_to_cpu(self.reference_model) - # self.model.to("cuda") self.model = move_to_gpu(self.model) self._held_reference_model_params = None # register_fsdp_forward_method(self.model, "generate") @@ -355,6 +207,13 @@ def do_fsdp(model): "No weights path provided. Starting from scratch (default policy init)" ) + @staticmethod + def configure_worker( + num_gpus: int | float, bundle_indices: Optional[list] = None + ) -> tuple[dict, dict, dict]: + env_vars = {"PYTORCH_CUDA_ALLOC_CONF": "expandable_segments:True"} + return None, env_vars, None + def is_alive(self): return True @@ -601,7 +460,6 @@ def use_reference_model(self): original_reference_model = self.reference_model self.model = move_to_cpu(self.model) - # self.reference_model = self.reference_model.to("cuda") self.reference_model = move_to_gpu(self.reference_model) # Swap the references @@ -616,7 +474,6 @@ def use_reference_model(self): finally: # Restore original references and device placement self.reference_model = move_to_cpu(original_reference_model) - # self.model = original_model.to("cuda") self.model = move_to_gpu(original_model) gc.collect() torch.cuda.empty_cache() @@ -890,14 +747,12 @@ def get_weight_ipc_handles(self, offload_model=True): return {self.device_uuid: data} def prepare_for_lp_inference(self): - # self.model.to("cuda") self.model = move_to_gpu(self.model) self.model.eval() self.offload_before_refit() def prepare_for_training(self, *args, **kwargs): # onload models and optimizer state to cuda - # self.model.to("cuda") self.model = move_to_gpu(self.model) self.model.train() @@ -953,18 +808,6 @@ def offload_after_refit(self): print( f"GPU Memory after refit complete: {allocated:.2f}GB allocated, {reserved:.2f}GB reserved" ) - - def move_to_cpu_old1(self, model): - for param in model.parameters(): - param.data = param.data.to("cpu") - - for buffer in model.buffers(): - buffer.data = buffer.data.to("cpu") - - if hasattr(model, "_fsdp_wrapped_module"): - model._fsdp_wrapped_module.to("cpu") - - return model def save_checkpoint( self, From fd0f613109e061c9a881166d7c306ab065805a15 Mon Sep 17 00:00:00 2001 From: Yi-Fu Wu Date: Wed, 26 Mar 2025 18:34:24 -0700 Subject: [PATCH 07/27] More cleanup Signed-off-by: Yi-Fu Wu --- examples/configs/grpo_math_1B.yaml | 15 +++++++-------- examples/run_grpo_math.py | 2 +- nemo_reinforcer/models/policy/hf_policy.py | 4 ++-- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/examples/configs/grpo_math_1B.yaml b/examples/configs/grpo_math_1B.yaml index a8149156e9..72aad000ce 100644 --- a/examples/configs/grpo_math_1B.yaml +++ b/examples/configs/grpo_math_1B.yaml @@ -5,7 +5,7 @@ grpo: max_num_steps: 1000000 normalize_rewards: true use_leave_one_out_baseline: true - val_period: -1 + val_period: 10 val_at_start: false max_val_samples: 256 val_batch_size: 256 @@ -15,7 +15,7 @@ loss_fn: ratio_eps: 0.2 checkpointing: - enabled: false + enabled: true checkpoint_dir: "results/grpo" metric_name: "val_reward" higher_is_better: true @@ -29,8 +29,7 @@ policy: generation_batch_size: 32 logprob_batch_size: 4 max_total_sequence_length: 512 - # precision: "bfloat16" - precision: "float32" + precision: "bfloat16" optimizer: name: "torch.optim.AdamW" @@ -76,17 +75,17 @@ env: logger: log_dir: "logs" # Base directory for all logs num_val_samples_to_print: 0 # Number of validation samples to pretty print on terminal - wandb_enabled: true + wandb_enabled: false tensorboard_enabled: false monitor_gpus: false # If true, will monitor GPU usage and log to wandb and/or tensorboard wandb: - project: "grpo-dev-yifu" - name: "grpo-dev-offload2" + project: "grpo-dev" + name: "grpo-dev-logger" tensorboard: {} gpu_monitoring: collection_interval: 10 # How often to collect GPU usage metrics (in seconds) flush_interval: 10 # How often to flush GPU usage metrics to the loggers (in seconds) cluster: - gpus_per_node: 2 + gpus_per_node: 1 num_nodes: 1 diff --git a/examples/run_grpo_math.py b/examples/run_grpo_math.py index b0085535ef..7e2f3e693d 100644 --- a/examples/run_grpo_math.py +++ b/examples/run_grpo_math.py @@ -23,7 +23,7 @@ from transformers import AutoTokenizer from collections import defaultdict -from nemo_reinforcer.algorithms.grpo import MasterConfig, grpo_train, setup, refit_policy_generation +from nemo_reinforcer.algorithms.grpo import MasterConfig, grpo_train, setup from nemo_reinforcer.distributed.virtual_cluster import init_ray from nemo_reinforcer.utils.config import load_config from nemo_reinforcer.utils.logger import get_next_experiment_dir diff --git a/nemo_reinforcer/models/policy/hf_policy.py b/nemo_reinforcer/models/policy/hf_policy.py index 08eb9061ce..9813187914 100644 --- a/nemo_reinforcer/models/policy/hf_policy.py +++ b/nemo_reinforcer/models/policy/hf_policy.py @@ -49,7 +49,7 @@ def move_to_cpu(model): - for _, param in model.named_parameters(): + for param in model.parameters(): param.data = param.data.to("cpu", non_blocking=True) if hasattr(param, "_local_shard"): param._local_shard = param.data @@ -63,7 +63,7 @@ def move_to_cpu(model): def move_to_gpu(model): - for _, param in model.named_parameters(): + for param in model.parameters(): param.data = param.data.to("cuda", non_blocking=True) if hasattr(param, "_local_shard"): param._local_shard = param.data From d9b7fd458a87885d550b6f16e15c807419a8de0a Mon Sep 17 00:00:00 2001 From: Yi-Fu Wu Date: Mon, 31 Mar 2025 07:22:34 -0700 Subject: [PATCH 08/27] cpu offload Signed-off-by: Yi-Fu Wu --- nemo_reinforcer/models/policy/hf_policy.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/nemo_reinforcer/models/policy/hf_policy.py b/nemo_reinforcer/models/policy/hf_policy.py index 9813187914..a5fb7be767 100644 --- a/nemo_reinforcer/models/policy/hf_policy.py +++ b/nemo_reinforcer/models/policy/hf_policy.py @@ -22,6 +22,7 @@ import torch from torch.distributed.device_mesh import init_device_mesh from torch.distributed.fsdp import ( + CPUOffload, FullyShardedDataParallel, FullStateDictConfig, MixedPrecision, @@ -49,6 +50,7 @@ def move_to_cpu(model): + return model for param in model.parameters(): param.data = param.data.to("cpu", non_blocking=True) if hasattr(param, "_local_shard"): @@ -63,6 +65,7 @@ def move_to_cpu(model): def move_to_gpu(model): + return model for param in model.parameters(): param.data = param.data.to("cuda", non_blocking=True) if hasattr(param, "_local_shard"): @@ -146,6 +149,7 @@ def do_fsdp(model): device_mesh=mesh, auto_wrap_policy=size_based_auto_wrap_policy, mixed_precision=mp_policy, + cpu_offload=CPUOffload(offload_params=True), ) self.model.to("cuda") @@ -761,7 +765,8 @@ def prepare_for_training(self, *args, **kwargs): for state in self.optimizer.state.values(): for k, v in state.items(): if torch.is_tensor(v) and not v.is_cuda: - state[k] = v.to("cuda") + ... + # state[k] = v.to("cuda") torch.cuda.empty_cache() @@ -773,10 +778,11 @@ def offload_before_refit(self): for state in self.optimizer.state.values(): for k, v in state.items(): if torch.is_tensor(v): - state[k] = v.to("cpu") + ... + # state[k] = v.to("cpu") - for buffer in self.model.buffers(): - buffer.data = buffer.data.to("cpu") + # for buffer in self.model.buffers(): + # buffer.data = buffer.data.to("cpu") gc.collect() torch.cuda.empty_cache() From d24a9c340a20fbdedc08355bc243a939f22545b7 Mon Sep 17 00:00:00 2001 From: Yi-Fu Wu Date: Mon, 31 Mar 2025 18:05:36 -0700 Subject: [PATCH 09/27] Make everything configurable Signed-off-by: Yi-Fu Wu --- examples/configs/grpo_math_1B.yaml | 2 + nemo_reinforcer/distributed/worker_groups.py | 6 +- nemo_reinforcer/models/policy/__init__.py | 2 + nemo_reinforcer/models/policy/hf_policy.py | 133 ++++++++++--------- 4 files changed, 78 insertions(+), 65 deletions(-) diff --git a/examples/configs/grpo_math_1B.yaml b/examples/configs/grpo_math_1B.yaml index 72aad000ce..f15031c536 100644 --- a/examples/configs/grpo_math_1B.yaml +++ b/examples/configs/grpo_math_1B.yaml @@ -30,6 +30,8 @@ policy: logprob_batch_size: 4 max_total_sequence_length: 512 precision: "bfloat16" + expandable_segments_enabled: false + fsdp_offload_enabled: false optimizer: name: "torch.optim.AdamW" diff --git a/nemo_reinforcer/distributed/worker_groups.py b/nemo_reinforcer/distributed/worker_groups.py index 2497065b0f..94af81b555 100644 --- a/nemo_reinforcer/distributed/worker_groups.py +++ b/nemo_reinforcer/distributed/worker_groups.py @@ -179,6 +179,7 @@ def __init__( workers_per_node: Optional[Union[int, List[int]]] = None, name_prefix: str = "", bundle_indices_list: Optional[List[tuple]] = None, + additional_env_vars: Optional[Dict[str, str]] = None, ): """Initialize a group of distributed Ray workers. @@ -191,13 +192,14 @@ def __init__( bundle_indices_list: Explicit list of (node_idx, [local_bundle_indices]) tuples. Each tuple defines a tied group of workers placed on the same node. If provided, workers_per_node is ignored. + additional_env_vars: Additional environment variables to pass to the workers """ self._workers = [] self._worker_metadata = [] self.cluster = cluster self.name_prefix = name_prefix self.tied_workers_groups = [] - + self.additional_env_vars = additional_env_vars # Maps worker indices to their corresponding tied group index # For example, if worker with index 3 belongs to tied worker group 1, # then worker_to_tied_group_index[3] = 1 @@ -287,6 +289,8 @@ def _create_workers_from_bundle_indices( "NODE_RANK": str(node_idx), } ) + if self.additional_env_vars: + env_vars.update(self.additional_env_vars) # For tensor parallel groups, only the first worker gets bundle_indices worker_bundle_indices = ( diff --git a/nemo_reinforcer/models/policy/__init__.py b/nemo_reinforcer/models/policy/__init__.py index ee2bf2389e..0e57b62ca1 100644 --- a/nemo_reinforcer/models/policy/__init__.py +++ b/nemo_reinforcer/models/policy/__init__.py @@ -25,3 +25,5 @@ class PolicyConfig(TypedDict): logprob_batch_size: int generation: GenerationConfig precision: str + expandable_segments_enabled: bool + fsdp_offload_enabled: bool diff --git a/nemo_reinforcer/models/policy/hf_policy.py b/nemo_reinforcer/models/policy/hf_policy.py index a5fb7be767..ef4ec8460f 100644 --- a/nemo_reinforcer/models/policy/hf_policy.py +++ b/nemo_reinforcer/models/policy/hf_policy.py @@ -49,36 +49,6 @@ ) -def move_to_cpu(model): - return model - for param in model.parameters(): - param.data = param.data.to("cpu", non_blocking=True) - if hasattr(param, "_local_shard"): - param._local_shard = param.data - if param.grad is not None: - param.grad = param.grad.to("cpu", non_blocking=True) - - if hasattr(model, "_fsdp_wrapped_module"): - move_to_cpu(model._fsdp_wrapped_module) - - return model - - -def move_to_gpu(model): - return model - for param in model.parameters(): - param.data = param.data.to("cuda", non_blocking=True) - if hasattr(param, "_local_shard"): - param._local_shard = param.data - if param.grad is not None: - param.grad = param.grad.to("cuda", non_blocking=True) - - if hasattr(model, "_fsdp_wrapped_module"): - move_to_gpu(model._fsdp_wrapped_module) - - return model - - @ray.remote class HfPolicyWorker: DEFAULT_PY_EXECUTABLE = PY_EXECUTABLES.DEFAULT_VENV @@ -144,23 +114,29 @@ def do_fsdp(model): buffer_dtype=torch.float32, ) + cpu_offload = ( + CPUOffload(offload_params=True) + if self.cfg["fsdp_offload_enabled"] + else None + ) + return FullyShardedDataParallel( model, device_mesh=mesh, auto_wrap_policy=size_based_auto_wrap_policy, mixed_precision=mp_policy, - cpu_offload=CPUOffload(offload_params=True), + cpu_offload=cpu_offload, ) self.model.to("cuda") self.model = do_fsdp(self.model) - self.model = move_to_cpu(self.model) + self.model = self.manual_offload_to_cpu(self.model) self.reference_model.to("cuda") self.reference_model = do_fsdp(self.reference_model) - self.reference_model = move_to_cpu(self.reference_model) + self.reference_model = self.manual_offload_to_cpu(self.reference_model) - self.model = move_to_gpu(self.model) + self.model = self.manual_load_to_gpu(self.model) self._held_reference_model_params = None # register_fsdp_forward_method(self.model, "generate") if init_optimizer: @@ -211,13 +187,6 @@ def do_fsdp(model): "No weights path provided. Starting from scratch (default policy init)" ) - @staticmethod - def configure_worker( - num_gpus: int | float, bundle_indices: Optional[list] = None - ) -> tuple[dict, dict, dict]: - env_vars = {"PYTORCH_CUDA_ALLOC_CONF": "expandable_segments:True"} - return None, env_vars, None - def is_alive(self): return True @@ -463,8 +432,8 @@ def use_reference_model(self): original_model = self.model original_reference_model = self.reference_model - self.model = move_to_cpu(self.model) - self.reference_model = move_to_gpu(self.reference_model) + self.model = self.manual_offload_to_cpu(self.model) + self.reference_model = self.manual_load_to_gpu(self.reference_model) # Swap the references self.model, self.reference_model = self.reference_model, self.model @@ -477,8 +446,8 @@ def use_reference_model(self): finally: # Restore original references and device placement - self.reference_model = move_to_cpu(original_reference_model) - self.model = move_to_gpu(original_model) + self.reference_model = self.manual_offload_to_cpu(original_reference_model) + self.model = self.manual_load_to_gpu(original_model) gc.collect() torch.cuda.empty_cache() @@ -745,28 +714,28 @@ def get_weight_ipc_handles(self, offload_model=True): data[name] = reduce_tensor(p.detach()) if offload_model: - self.model = move_to_cpu(self.model) + self.model = self.manual_offload_to_cpu(self.model) gc.collect() torch.cuda.empty_cache() return {self.device_uuid: data} def prepare_for_lp_inference(self): - self.model = move_to_gpu(self.model) + self.model = self.manual_load_to_gpu(self.model) self.model.eval() self.offload_before_refit() def prepare_for_training(self, *args, **kwargs): # onload models and optimizer state to cuda - self.model = move_to_gpu(self.model) + self.model = self.manual_load_to_gpu(self.model) self.model.train() - # Move optimizer state to CUDA if it exists - if hasattr(self, "optimizer") and self.optimizer is not None: - for state in self.optimizer.state.values(): - for k, v in state.items(): - if torch.is_tensor(v) and not v.is_cuda: - ... - # state[k] = v.to("cuda") + if not self.cfg["fsdp_offload_enabled"]: + # Move optimizer state to CUDA if it exists + if hasattr(self, "optimizer") and self.optimizer is not None: + for state in self.optimizer.state.values(): + for k, v in state.items(): + if torch.is_tensor(v) and not v.is_cuda: + state[k] = v.to("cuda") torch.cuda.empty_cache() @@ -774,15 +743,15 @@ def prepare_for_training(self, *args, **kwargs): def offload_before_refit(self): """Offload the optimizer and buffers to the CPU.""" torch.randn(1).cuda() # wake up torch allocator - if hasattr(self, "optimizer") and self.optimizer is not None: - for state in self.optimizer.state.values(): - for k, v in state.items(): - if torch.is_tensor(v): - ... - # state[k] = v.to("cpu") + if not self.cfg["fsdp_offload_enabled"]: + if hasattr(self, "optimizer") and self.optimizer is not None: + for state in self.optimizer.state.values(): + for k, v in state.items(): + if torch.is_tensor(v): + state[k] = v.to("cpu") - # for buffer in self.model.buffers(): - # buffer.data = buffer.data.to("cpu") + for buffer in self.model.buffers(): + buffer.data = buffer.data.to("cpu") gc.collect() torch.cuda.empty_cache() @@ -797,7 +766,7 @@ def offload_before_refit(self): @torch.no_grad() def offload_after_refit(self): # Offload as much as possible on the CPU - self.model = move_to_cpu(self.model) + self.model = self.manual_offload_to_cpu(self.model) self.model.eval() torch.randn(1).cuda() # wake up torch allocator self.offload_before_refit() # rerun the old offload function @@ -815,6 +784,38 @@ def offload_after_refit(self): f"GPU Memory after refit complete: {allocated:.2f}GB allocated, {reserved:.2f}GB reserved" ) + def manual_offload_to_cpu(self, model): + if self.cfg["fsdp_offload_enabled"]: + return model + + for param in model.parameters(): + param.data = param.data.to("cpu", non_blocking=True) + if hasattr(param, "_local_shard"): + param._local_shard = param.data + if param.grad is not None: + param.grad = param.grad.to("cpu", non_blocking=True) + + if hasattr(model, "_fsdp_wrapped_module"): + self.manual_offload_to_cpu(model._fsdp_wrapped_module) + + return model + + def manual_load_to_gpu(self, model): + if self.cfg["fsdp_offload_enabled"]: + return model + + for param in model.parameters(): + param.data = param.data.to("cuda", non_blocking=True) + if hasattr(param, "_local_shard"): + param._local_shard = param.data + if param.grad is not None: + param.grad = param.grad.to("cuda", non_blocking=True) + + if hasattr(model, "_fsdp_wrapped_module"): + self.manual_load_to_gpu(model._fsdp_wrapped_module) + + return model + def save_checkpoint( self, weights_path: str, @@ -921,11 +922,15 @@ def __init__( weights_path=weights_path, optimizer_path=optimizer_path, ) + additional_env_vars = {} + if config["expandable_segments_enabled"]: + additional_env_vars["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True" self.worker_group = RayWorkerGroup( cluster, worker_builder, name_prefix=name_prefix, workers_per_node=workers_per_node, + additional_env_vars=additional_env_vars, ) self.dp_size = self.worker_group.world_size self.cfg = config From 11fbef755cce4045cfbea76746b30b790e12176f Mon Sep 17 00:00:00 2001 From: Yi-Fu Wu Date: Wed, 2 Apr 2025 04:06:16 -0700 Subject: [PATCH 10/27] expandable_segments and fsdp offload configs Signed-off-by: Yi-Fu Wu --- examples/configs/grpo_math_8B.yaml | 2 ++ examples/configs/sft.yaml | 2 ++ 2 files changed, 4 insertions(+) diff --git a/examples/configs/grpo_math_8B.yaml b/examples/configs/grpo_math_8B.yaml index d747e9249f..3408e84e32 100644 --- a/examples/configs/grpo_math_8B.yaml +++ b/examples/configs/grpo_math_8B.yaml @@ -9,6 +9,8 @@ policy: logprob_batch_size: 2 max_total_sequence_length: 4096 precision: "bfloat16" + expandable_segments_enabled: false + fsdp_offload_enabled: false optimizer: name: "torch.optim.AdamW" diff --git a/examples/configs/sft.yaml b/examples/configs/sft.yaml index 1282285fc3..8bb0c7615b 100644 --- a/examples/configs/sft.yaml +++ b/examples/configs/sft.yaml @@ -22,6 +22,8 @@ policy: train_micro_batch_size: 1 max_total_sequence_length: 2048 precision: "float32" + expandable_segments_enabled: false + fsdp_offload_enabled: false optimizer: name: "torch.optim.AdamW" From ad48c1db004e7031b9e6c4571e9547d47a098a88 Mon Sep 17 00:00:00 2001 From: Yi-Fu Wu Date: Wed, 2 Apr 2025 04:06:42 -0700 Subject: [PATCH 11/27] Make port configurable in ray.sub Signed-off-by: Yi-Fu Wu --- ray.sub | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ray.sub b/ray.sub index 7258aec045..3fb8b76585 100644 --- a/ray.sub +++ b/ray.sub @@ -17,6 +17,7 @@ set -eoux pipefail CONTAINER=$CONTAINER MOUNTS=$MOUNTS COMMAND=${COMMAND:-} # This is a script relative to the SLURM_SUBMIT_DIR. If left empty, it will leave the cluster idle after it's brought up. +PORT=${PORT:-41993} ######################################################## COMMON_SRUN_ARGS="" @@ -54,7 +55,7 @@ done head_node=${nodes_array[0]} head_node_ip=${ip_addresses_array[0]} -port=41993 +port=$PORT ip_head=$head_node_ip:$port # First we start the head of the ray cluster on one of the physical nodes From 83bd55d38da8c8808a457b659a12584669eb92ae Mon Sep 17 00:00:00 2001 From: Parth Chadha Date: Wed, 26 Mar 2025 16:22:43 -0700 Subject: [PATCH 12/27] fix: error out early if ray cluster does not have resources (#89) Signed-off-by: Parth Chadha Signed-off-by: Yi-Fu Wu --- .../distributed/virtual_cluster.py | 43 ++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/nemo_reinforcer/distributed/virtual_cluster.py b/nemo_reinforcer/distributed/virtual_cluster.py index 3c45697345..d909f17b6b 100644 --- a/nemo_reinforcer/distributed/virtual_cluster.py +++ b/nemo_reinforcer/distributed/virtual_cluster.py @@ -173,6 +173,30 @@ def _init_placement_groups(self, strategy: str): if self._node_placement_groups is not None: return self._node_placement_groups + # Check available resources in the Ray cluster + cluster_resources = ray.cluster_resources() + total_available_gpus = int(cluster_resources.get("GPU", 0)) + total_available_cpus = int(cluster_resources.get("CPU", 0)) + + # Calculate required resources + total_requested_gpus = ( + sum(self._bundle_ct_per_node_list) if self.use_gpus else 0 + ) + total_requested_cpus = ( + sum(self._bundle_ct_per_node_list) * self.max_colocated_worker_groups + ) + + # Validate resources + if self.use_gpus and total_requested_gpus > total_available_gpus: + raise ValueError( + f"Not enough GPUs available. Requested {total_requested_gpus} GPUs, but only {total_available_gpus} are available in the cluster." + ) + + if total_requested_cpus > total_available_cpus: + raise ValueError( + f"Not enough CPUs available. Requested {total_requested_cpus} CPUs, but only {total_available_cpus} are available in the cluster." + ) + num_cpus_per_bundle = self.max_colocated_worker_groups # num_gpus_per_bundle == 1 indicates that there is 1 GPU per process num_gpus_per_bundle = 1 if self.use_gpus else 0 @@ -192,7 +216,24 @@ def _init_placement_groups(self, strategy: str): for i, bundles in enumerate(resources) ] - ray.get([pg.ready() for pg in self._node_placement_groups]) + # Add timeout to prevent hanging indefinitely + try: + ray.get( + [pg.ready() for pg in self._node_placement_groups], timeout=180 + ) # 3-minute timeout + except (TimeoutError, ray.exceptions.GetTimeoutError): + # Clean up any created placement groups + for pg in self._node_placement_groups: + try: + remove_placement_group(pg) + except Exception: + pass + self._node_placement_groups = None + raise TimeoutError( + "Timed out waiting for placement groups to be ready. The cluster may not have enough resources " + "to satisfy the requested configuration, or the resources may be busy with other tasks." + ) + return self._node_placement_groups def get_placement_groups(self): From f6a3d91a777814e2603668d692c9232ed31eaadc Mon Sep 17 00:00:00 2001 From: Terry Kong Date: Wed, 26 Mar 2025 16:56:26 -0700 Subject: [PATCH 13/27] ci: skip functional until more capacity available and/or tests speed up (#94) Signed-off-by: Terry Kong Signed-off-by: Yi-Fu Wu --- .github/workflows/cicd-main.yml | 42 +++++++++++++++++---------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml index 2d2255e7f2..7902c02146 100644 --- a/.github/workflows/cicd-main.yml +++ b/.github/workflows/cicd-main.yml @@ -165,23 +165,25 @@ jobs: secrets: HF_TOKEN: ${{ secrets.HF_TOKEN }} - functional-tests: - name: ${{ matrix.test_case }} - needs: [build-container, pre-flight] - uses: ./.github/workflows/_run_test.yml - if: ${{ needs.pre-flight.outputs.run_ci == 'true' }} - strategy: - matrix: - test_case: - - sft.sh - - grpo.sh - with: - # TODO: For now, allow these to fail since the checks are not robust. - IS_OPTIONAL: true - RUNNER: self-hosted-azure - TIMEOUT: 15 - SCRIPT: | - cd ${REINFORCER_REPO_DIR} - uv run bash ./tests/functional/${{ matrix.test_case }} - secrets: - HF_TOKEN: ${{ secrets.HF_TOKEN }} \ No newline at end of file + # TODO: Temporarily disable functional tests until we have more capacity and tests run quicker + # Related: https://github.com/NVIDIA/reinforcer/pull/27 + #functional-tests: + # name: ${{ matrix.test_case }} + # needs: [build-container, pre-flight] + # uses: ./.github/workflows/_run_test.yml + # if: ${{ needs.pre-flight.outputs.run_ci == 'true' }} + # strategy: + # matrix: + # test_case: + # - sft.sh + # - grpo.sh + # with: + # # TODO: For now, allow these to fail since the checks are not robust. + # IS_OPTIONAL: true + # RUNNER: self-hosted-azure + # TIMEOUT: 15 + # SCRIPT: | + # cd ${REINFORCER_REPO_DIR} + # uv run bash ./tests/functional/${{ matrix.test_case }} + # secrets: + # HF_TOKEN: ${{ secrets.HF_TOKEN }} \ No newline at end of file From 4eb1d6dcf25c1f053a82ed0a02b281be91201d93 Mon Sep 17 00:00:00 2001 From: yuki <48991475+yuki-666@users.noreply.github.com> Date: Thu, 27 Mar 2025 09:00:25 +0800 Subject: [PATCH 14/27] feat: evaluation implement (#16) Signed-off-by: Yuki Huang Signed-off-by: Yi-Fu Wu --- docs/guides/eval.md | 33 +++ docs/index.md | 1 + examples/configs/eval.yaml | 30 +++ examples/run_eval.py | 145 +++++++++++++ examples/run_grpo_math.py | 14 +- nemo_reinforcer/data/__init__.py | 5 + nemo_reinforcer/data/datasets.py | 51 +++++ nemo_reinforcer/data/llm_message_utils.py | 28 ++- nemo_reinforcer/evals/eval.py | 191 ++++++++++++++++++ nemo_reinforcer/evals/run_env_eval.py | 13 -- nemo_reinforcer/models/generation/vllm.py | 70 ++++++- .../models/generation/test_vllm_generation.py | 45 ++++- 12 files changed, 602 insertions(+), 24 deletions(-) create mode 100644 docs/guides/eval.md create mode 100644 examples/configs/eval.yaml create mode 100644 examples/run_eval.py create mode 100644 nemo_reinforcer/evals/eval.py delete mode 100644 nemo_reinforcer/evals/run_env_eval.py diff --git a/docs/guides/eval.md b/docs/guides/eval.md new file mode 100644 index 0000000000..8ac5ab5675 --- /dev/null +++ b/docs/guides/eval.md @@ -0,0 +1,33 @@ +# Evaluation + +## Start Evaluation + +### Start Script +```sh +# To run the evaluation with default config (examples/configs/eval.yaml) +uv run python examples/run_eval.py + +# Specify a custom config file +uv run python examples/run_eval.py --config path/to/custom_config.yaml + +# Override specific config values via command line +uv run python examples/run_eval.py generation.model_name="Qwen/Qwen2.5-Math-7B-Instruct" +``` + +### Example Output + +``` +============================================================ +model_name='Qwen2.5-Math-1.5B-Instruct' dataset_name='aime_2024' +score=0.10 (3.0/30) +============================================================ +``` + +## Configuration + +An example Evaluation configuration file can be found [here](../../examples/configs/eval.yaml). + +### Prompt Template Configuration +Always remember to use the same `prompt_file` and `system_prompt_file` that were used during training. + +For open-source models, we recommend setting `prompt_file=null` and `system_prompt_file=null` to allow them to use their native chat templates. diff --git a/docs/index.md b/docs/index.md index 56cd64ac1b..0628f19953 100644 --- a/docs/index.md +++ b/docs/index.md @@ -18,6 +18,7 @@ cluster.md adding_new_models.md guides/sft.md guides/grpo.md +guides/eval.md ``` ```{toctree} diff --git a/examples/configs/eval.yaml b/examples/configs/eval.yaml new file mode 100644 index 0000000000..a867e9617f --- /dev/null +++ b/examples/configs/eval.yaml @@ -0,0 +1,30 @@ +# Evaluation Configuration +generation: + backend: "vllm" # only vllm is supported for evaluation + max_new_tokens: ${generation.vllm_cfg.max_model_len} + temperature: 0.0 + top_p: 1.0 + top_k: -1 # disable + num_prompts_per_step: -1 # -1 means pass all prompts at once + model_name: "Qwen/Qwen2.5-Math-1.5B-Instruct" + vllm_cfg: + tensor_parallel_size: 1 + gpu_memory_utilization: 0.9 + max_model_len: 2048 + +data: + max_input_seq_length: ${generation.vllm_cfg.max_model_len} # useless since we directly use prompts in evaluation + prompt_file: null + system_prompt_file: null + dataset_name: "HuggingFaceH4/aime_2024" + dataset_key: "train" + problem_key: "problem" + solution_key: "answer" + +env: + math: + num_workers: 8 + +cluster: + gpus_per_node: 1 + num_nodes: 1 diff --git a/examples/run_eval.py b/examples/run_eval.py new file mode 100644 index 0000000000..54358ad260 --- /dev/null +++ b/examples/run_eval.py @@ -0,0 +1,145 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os +import pprint +import sys + +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from datasets import load_dataset +from omegaconf import OmegaConf +from transformers import AutoTokenizer + +from examples.run_grpo_math import math_data_processor +from nemo_reinforcer.data import MathDataConfig +from nemo_reinforcer.data.datasets import AllTaskProcessedDataset +from nemo_reinforcer.data.interfaces import TaskDataSpec +from nemo_reinforcer.data.llm_message_utils import remap_dataset_keys +from nemo_reinforcer.distributed.virtual_cluster import init_ray +from nemo_reinforcer.environments.math_environment import MathEnvironment +from nemo_reinforcer.evals.eval import MasterConfig, run_env_eval, setup +from nemo_reinforcer.models.generation.interfaces import GenerationConfig + + +def parse_args(): + """Parse command line arguments.""" + parser = argparse.ArgumentParser(description="Run Evaluation with configuration") + parser.add_argument( + "--config", type=str, default=None, help="Path to YAML config file" + ) + + # Parse known args for the script + args, remaining = parser.parse_known_args() + + # Convert remaining args to OmegaConf format + overrides = OmegaConf.from_dotlist(remaining) + + return args, overrides + + +def setup_data( + data_config: MathDataConfig, generation_config: GenerationConfig, env_configs +): + print("\n▶ Setting up data...") + math_task_spec = TaskDataSpec( + task_name="math", + prompt_file=data_config["prompt_file"], + system_prompt_file=data_config["system_prompt_file"], + ) + + # load dataset + base_dataset = load_dataset(data_config["dataset_name"]) + if data_config["dataset_key"] is not None: + base_dataset = base_dataset[data_config["dataset_key"]] + # remap problem and solution keys + remapped_dataset = remap_dataset_keys( + base_dataset, + mapping_dict={ + data_config["problem_key"]: "problem", + data_config["solution_key"]: "expected_answer", + }, + ) + + tokenizer = AutoTokenizer.from_pretrained(generation_config["model_name"]) + if tokenizer.pad_token is None: + tokenizer.pad_token = tokenizer.eos_token + + math_env = MathEnvironment.options( + runtime_env={"py_executable": MathEnvironment.DEFAULT_PY_EXECUTABLE} + ).remote(env_configs["math"]) + + dataset = AllTaskProcessedDataset( + dataset=remapped_dataset, + tokenizer=tokenizer, + default_task_data_spec=math_task_spec, + task_data_processors=math_data_processor, + max_seq_length=data_config["max_input_seq_length"], + ) + + return dataset, math_env, tokenizer + + +def main(): + """Main entry point.""" + # Parse arguments + args, overrides = parse_args() + + if not args.config: + args.config = os.path.join(os.path.dirname(__file__), "configs", "eval.yaml") + + config = OmegaConf.load(args.config) + print(f"Loaded configuration from: {args.config}") + + if overrides: + override_conf = OmegaConf.from_cli() + print(f"Overrides: {override_conf}") + config = OmegaConf.merge(config, override_conf) + + config: MasterConfig = OmegaConf.to_container(config, resolve=True) + print("Applied CLI overrides") + + # Print config + print("Final config:") + pprint.pprint(config) + + # Init ray + init_ray() + + # Setup data + ( + dataset, + math_env, + tokenizer, + ) = setup_data(config["data"], config["generation"], config["env"]) + + # Setup + ( + vllm_generation, + dataloader, + master_config, + ) = setup(config, tokenizer, dataset) + + # Run evaluation + run_env_eval( + vllm_generation, + dataloader, + math_env, + master_config, + ) + + +if __name__ == "__main__": + main() diff --git a/examples/run_grpo_math.py b/examples/run_grpo_math.py index 7e2f3e693d..b87c7037b3 100644 --- a/examples/run_grpo_math.py +++ b/examples/run_grpo_math.py @@ -122,6 +122,8 @@ def math_data_processor( template = task_data_spec.custom_template message_log: LLMMessageLogType = [] + + # system prompt if task_data_spec.system_prompt: sys_message = {"role": "system", "content": task_data_spec.system_prompt} message = tokenizer.apply_chat_template( @@ -135,10 +137,11 @@ def math_data_processor( 0 ] message_log.append(sys_message) - user_message = { - "role": "user", - "content": task_data_spec.prompt.format(problem), - } + + # user prompt + if task_data_spec.prompt: + problem = task_data_spec.prompt.format(problem) + user_message = {"role": "user", "content": problem} message = tokenizer.apply_chat_template( [user_message], chat_template=template, @@ -167,8 +170,9 @@ def math_data_processor( "extra_env_info": extra_env_info, "loss_multiplier": loss_multiplier, "idx": idx, - "task_name": datum_dict["task_name"], } + if "task_name" in datum_dict: + output["task_name"] = datum_dict["task_name"] return output diff --git a/nemo_reinforcer/data/__init__.py b/nemo_reinforcer/data/__init__.py index 63aad516b2..09eaf35fb5 100644 --- a/nemo_reinforcer/data/__init__.py +++ b/nemo_reinforcer/data/__init__.py @@ -21,3 +21,8 @@ class DataConfig(TypedDict): system_prompt_file: Optional[str] dataset_name: str val_dataset_name: Optional[str] + + +class MathDataConfig(DataConfig): + problem_key: str + solution_key: str diff --git a/nemo_reinforcer/data/datasets.py b/nemo_reinforcer/data/datasets.py index 033cee05d7..8a81c85fb2 100644 --- a/nemo_reinforcer/data/datasets.py +++ b/nemo_reinforcer/data/datasets.py @@ -130,3 +130,54 @@ def rl_collate_fn(data_batch: List[DatumSpec]) -> BatchedDataDict: batch_max_length=batch_max_length, ) return output + + +def eval_collate_fn(data_batch: List[DatumSpec]) -> BatchedDataDict: + """Collate function for evaluation. + + Takes a list of data samples and combines them into a single batched dictionary + for model evaluation. + + Args: + data_batch: List of data samples with message_log, extra_env_info, and idx fields. + + Returns: + BatchedDataDict with message_log, extra_env_info, and idx fields. + + Examples: + ```{doctest} + >>> import torch + >>> from nemo_reinforcer.data.datasets import eval_collate_fn + >>> from nemo_reinforcer.data.interfaces import DatumSpec + >>> data_batch = [ + ... DatumSpec( + ... message_log=[{"role": "user", "content": "Hello", "token_ids": torch.tensor([1, 2, 3])}], + ... extra_env_info={'ground_truth': '1'}, + ... idx=0, + ... ), + ... DatumSpec( + ... message_log=[{"role": "assistant", "content": "Hi there", "token_ids": torch.tensor([4, 5, 6, 7])}], + ... extra_env_info={'ground_truth': '2'}, + ... idx=1, + ... ), + ... ] + >>> output = eval_collate_fn(data_batch) + >>> output['message_log'][0] + [{'role': 'user', 'content': 'Hello', 'token_ids': tensor([1, 2, 3])}] + >>> output['message_log'][1] + [{'role': 'assistant', 'content': 'Hi there', 'token_ids': tensor([4, 5, 6, 7])}] + >>> output['extra_env_info'] + [{'ground_truth': '1'}, {'ground_truth': '2'}] + >>> output['idx'] + [0, 1] + """ + message_log = [datum_spec["message_log"] for datum_spec in data_batch] + extra_env_info = [datum_spec["extra_env_info"] for datum_spec in data_batch] + idx = [datum_spec["idx"] for datum_spec in data_batch] + + output = BatchedDataDict( + message_log=message_log, + extra_env_info=extra_env_info, + idx=idx, + ) + return output diff --git a/nemo_reinforcer/data/llm_message_utils.py b/nemo_reinforcer/data/llm_message_utils.py index 43e24fc1ce..5ae8bee9a8 100644 --- a/nemo_reinforcer/data/llm_message_utils.py +++ b/nemo_reinforcer/data/llm_message_utils.py @@ -11,10 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, List, Union - +from typing import Dict, List import torch +from datasets import Dataset from nemo_reinforcer.data.interfaces import ( LLMMessageLogType, @@ -390,3 +390,27 @@ def get_formatted_message_log( prev_formatted_message = formatted_message return message_log + + +def remap_dataset_keys( + dataset: Dataset, + mapping_dict: Dict[str, str], +) -> Dataset: + """Remap dataset keys as per mapping. + + Args: + dataset: The input dataset to remap keys in + mapping_dict: A dictionary mapping input keys to output keys + + Returns: + Dataset: A new dataset with remapped keys + """ + # no need to remap if the keys are already correct + if all(k == v for k, v in mapping_dict.items()): + return dataset + + # return the remapped dataset + return dataset.map( + lambda x: {v: x[k] for k, v in mapping_dict.items()}, + remove_columns=list(mapping_dict.keys()), + ) diff --git a/nemo_reinforcer/evals/eval.py b/nemo_reinforcer/evals/eval.py new file mode 100644 index 0000000000..33d486a4d5 --- /dev/null +++ b/nemo_reinforcer/evals/eval.py @@ -0,0 +1,191 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Tuple, TypedDict + +import ray +from torch.utils.data import DataLoader +from transformers import AutoTokenizer + +from nemo_reinforcer.data import MathDataConfig +from nemo_reinforcer.data.datasets import AllTaskProcessedDataset, eval_collate_fn +from nemo_reinforcer.data.llm_message_utils import get_keys_from_message_log +from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict +from nemo_reinforcer.distributed.virtual_cluster import ClusterConfig, RayVirtualCluster +from nemo_reinforcer.environments.math_environment import MathEnvConfig +from nemo_reinforcer.models.generation.interfaces import GenerationConfig +from nemo_reinforcer.models.generation.vllm import VllmGeneration + + +# =============================================================================== +# Configuration +# =============================================================================== + + +class MasterConfig(TypedDict): + generate: GenerationConfig + data: MathDataConfig + env: MathEnvConfig + cluster: ClusterConfig + + +# =============================================================================== +# Setup & Initialization +# =============================================================================== + + +def setup( + master_config: MasterConfig, + tokenizer: AutoTokenizer, + dataset: AllTaskProcessedDataset, +) -> Tuple[ + VllmGeneration, + DataLoader, + MasterConfig, +]: + """Set up components for model evaluation. + + Initializes the VLLM model and data loader. + + Args: + master_config: Configuration settings. + dataset: Dataset to evaluate on. + + Returns: + VLLM model, data loader, and config. + """ + # Extract individual configs for easier access + generation_config = master_config["generation"] + cluster_config = master_config["cluster"] + + # ========================== + # Data + # ========================== + if generation_config["num_prompts_per_step"] == -1: + generation_config["num_prompts_per_step"] = len(dataset) + dataloader = DataLoader( + dataset, + batch_size=generation_config["num_prompts_per_step"], + shuffle=False, + collate_fn=eval_collate_fn, + ) + print(f" ✓ Evaluation dataset loaded with {len(dataset)} samples") + + # ========================== + # Cluster + # ========================== + print("\n▶ Setting up compute cluster...") + cluster = RayVirtualCluster( + name="eval_cluster", + bundle_ct_per_node_list=[cluster_config["gpus_per_node"]] + * cluster_config["num_nodes"], + use_gpus=True, + num_gpus_per_node=cluster_config["gpus_per_node"], + max_colocated_worker_groups=1, + ) + print(f" ✓ Ray cluster initialized with {cluster_config['num_nodes']} nodes") + + # ========================== + # Model + # ========================== + print("\n▶ Setting up model...") + # check backend + backend = generation_config["backend"] + assert backend == "vllm", "Only vLLM backend is supported for evaluation" + + # set vllm config + generation_config["vllm_cfg"]["load_format"] = "auto" + generation_config["vllm_cfg"]["skip_tokenizer_init"] = False + generation_config["stop_token_ids"] = [tokenizer.eos_token_id] + generation_config["pad_token"] = tokenizer.pad_token_id + + # initialize vllm generation + vllm_generation = VllmGeneration(cluster=cluster, config=generation_config) + print( + f" ✓ Using vLLM backend for generation with {generation_config['model_name']}" + ) + + print("\n" + "=" * 60) + print(" " * 18 + "SETUP COMPLETE") + print("=" * 60 + "\n") + + return ( + vllm_generation, + dataloader, + master_config, + ) + + +# =============================================================================== +# Evaluation +# =============================================================================== + + +def run_env_eval(vllm_generation, dataloader, env, master_config): + """Main entry point for running evaluation using environment. + + Generates model responses and evaluates them by env. + + Args: + vllm_generation: Model for generating responses. + dataloader: Data loader with evaluation samples. + env: Environment that scores responses. + master_config: Configuration settings. + """ + # Run evaluation loop + score, count = 0.0, 0 + for batch in dataloader: + # get input prompt from message_log + prompts = [] + for message_log in batch["message_log"]: + content = [message["content"] for message in message_log] + content = "\n".join(content) + prompts.append(content) + # generate by vllm + inputs = BatchedDataDict({"prompts": prompts}) + outputs = vllm_generation.generate_text(inputs)["texts"] + + # append to message_log + for idx, output in enumerate(outputs): + batch["message_log"][idx].append( + { + "role": "assistant", + "content": output, + } + ) + + # evaluate generations with the environment + to_env = [ + get_keys_from_message_log(batch["message_log"][i], ["role", "content"]) + for i in range(len(batch["message_log"])) + ] + _, _, rewards, _ = ray.get(env.step.remote(to_env, batch["extra_env_info"])) + + score += rewards.sum().item() + count += len(rewards) + + # Cleanup before printing results + ray.get(env.shutdown.remote()) + vllm_generation.shutdown() + + # Print results + dataset_name = os.path.basename(master_config["data"]["dataset_name"]) + model_name = os.path.basename(master_config["generation"]["model_name"]) + average_score = score / count + + print("\n" + "=" * 60) + print(f"{model_name=} {dataset_name=}") + print(f"score={average_score:.2f} ({score}/{count})") + print("=" * 60 + "\n") diff --git a/nemo_reinforcer/evals/run_env_eval.py b/nemo_reinforcer/evals/run_env_eval.py deleted file mode 100644 index 341a77c5bc..0000000000 --- a/nemo_reinforcer/evals/run_env_eval.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo_reinforcer/models/generation/vllm.py b/nemo_reinforcer/models/generation/vllm.py index cb60b7fe8c..2395b65e34 100644 --- a/nemo_reinforcer/models/generation/vllm.py +++ b/nemo_reinforcer/models/generation/vllm.py @@ -200,6 +200,7 @@ def generate( Args: data: BatchedDataDict containing input_ids and input_lengths tensors + greedy: Whether to use greedy decoding instead of sampling Returns: BatchedDataDict conforming to GenerationOutputSpec: @@ -246,7 +247,7 @@ def generate( # Read generation parameters from config top_k = self.cfg["top_k"] if self.cfg["top_k"] is not None else -1 sampling_params = self.SamplingParams( - temperature=self.cfg["temperature"], + temperature=self.cfg["temperature"] if not greedy else 0, top_p=self.cfg["top_p"], top_k=top_k if not greedy @@ -330,6 +331,37 @@ def generate( return return_data + def generate_text( + self, data: BatchedDataDict[GenerationDatumSpec], greedy: bool = False + ) -> BatchedDataDict[GenerationOutputSpec]: + """Generate text responses using vLLM generation. + + Args: + data: BatchedDataDict containing prompts with text strings + greedy: Whether to use greedy decoding instead of sampling + + Returns: + BatchedDataDict containing: + - texts: List of generated text responses + """ + # Read generation parameters from config + top_k = self.cfg["top_k"] if self.cfg["top_k"] is not None else -1 + sampling_params = self.SamplingParams( + temperature=self.cfg["temperature"] if not greedy else 0, + top_p=self.cfg["top_p"], + top_k=top_k if not greedy else 1, + max_tokens=self.cfg["max_new_tokens"], + stop=self.cfg.get("stop_sequences", None), + ) + + # Generate outputs + outputs = self.llm.generate(data["prompts"], sampling_params) + texts = [output.outputs[0].text for output in outputs] + + # Convert to BatchedDataDict + return_data = BatchedDataDict({"texts": texts}) + return return_data + def shutdown(self): """Clean up vLLM resources.""" try: @@ -537,6 +569,42 @@ def generate( return combined + def generate_text( + self, data: BatchedDataDict[GenerationDatumSpec], greedy: bool = False + ) -> BatchedDataDict[GenerationOutputSpec]: + """Generate text responses using vLLM.""" + assert isinstance(data, BatchedDataDict), ( + f"data must be a BatchedDataDict, got type: {type(data)}" + ) + + # Get total batch size + batch_size = len(data["prompts"]) + + # Shard the data across the tied worker groups + sharded_data = data.shard_by_batch_size(self.dp_size, batch_size=batch_size) + future_bundle = self.worker_group.run_all_workers_multiple_data( + "generate_text", + sharded_data, + common_kwargs={"greedy": greedy}, + respect_tied_workers=True, + ) + + # Get results from the workers, respecting tied worker groups (only one result per tied worker group) + results = self.worker_group.get_all_worker_results(future_bundle) + + # Combine results from all tied worker groups + combined = BatchedDataDict.from_batches(results) + + # Verify the output has all required fields + required_keys = ["texts"] + missing_keys = [key for key in required_keys if key not in combined] + if missing_keys: + raise ValueError( + f"Missing required keys for GenerationOutputSpec: {missing_keys}" + ) + + return combined + def prepare_for_generation(self, *args, **kwargs): """Abstract method that must be implemented by subclasses.""" try: diff --git a/tests/unit/models/generation/test_vllm_generation.py b/tests/unit/models/generation/test_vllm_generation.py index af8b5698e3..8c810e31dc 100644 --- a/tests/unit/models/generation/test_vllm_generation.py +++ b/tests/unit/models/generation/test_vllm_generation.py @@ -40,10 +40,14 @@ } -def configure_vllm_with_tokenizer(vllm_config, tokenizer): +def configure_vllm_with_tokenizer(vllm_config, tokenizer, is_eval=False): """Apply tokenizer-specific configurations to vLLM config.""" - vllm_config["vllm_cfg"]["skip_tokenizer_init"] = True - vllm_config["vllm_cfg"]["load_format"] = "dummy" + if is_eval: + vllm_config["vllm_cfg"]["skip_tokenizer_init"] = False + vllm_config["vllm_cfg"]["load_format"] = "auto" + else: + vllm_config["vllm_cfg"]["skip_tokenizer_init"] = True + vllm_config["vllm_cfg"]["load_format"] = "dummy" vllm_config["pad_token"] = tokenizer.pad_token_id vllm_config["stop_token_ids"] = [tokenizer.eos_token_id] return vllm_config @@ -532,3 +536,38 @@ def test_vllm_policy_weight_update(cluster, tokenizer, tensor_parallel_size): # Clean up vllm_policy.shutdown() + + +def test_vllm_generate_text(cluster, tokenizer): + """Test that vLLM can generate text.""" + # Prepare test data + test_prompts = [ + "Hello, my name is", + "The capital of France is", + ] + test_prompts = BatchedDataDict({"prompts": test_prompts}) + + # Create separate configs for each policy + vllm_config = basic_vllm_test_config.copy() + vllm_config = configure_vllm_with_tokenizer(vllm_config, tokenizer, is_eval=True) + + # Ensure we can get same output + assert vllm_config["model_name"] == "meta-llama/Llama-3.2-1B", ( + "Model name should be meta-llama/Llama-3.2-1B to get expected output" + ) + assert vllm_config["vllm_cfg"]["tensor_parallel_size"] == 1, ( + "Tensor parallel size should be 1 to get expected output" + ) + + # Create vLLM generation + vllm_generation = VllmGeneration(cluster, vllm_config) + + # Generate and check result + output = vllm_generation.generate_text(test_prompts, greedy=True) + assert output["texts"] == [ + " Kelsey and I am a 2018 graduate", + " Paris. The city is located in the north of", + ], "Output should be the same as the expected output" + + # Clean up + vllm_generation.shutdown() From 8062086f60af0ca4ce7990eb8d4ce59ad8b3394c Mon Sep 17 00:00:00 2001 From: Parth Chadha Date: Wed, 26 Mar 2025 21:03:49 -0700 Subject: [PATCH 15/27] fix: gradient should be averaged instead of summed across mbs (#86) Signed-off-by: Parth Chadha Signed-off-by: Yi-Fu Wu --- nemo_reinforcer/models/policy/hf_policy.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/nemo_reinforcer/models/policy/hf_policy.py b/nemo_reinforcer/models/policy/hf_policy.py index ef4ec8460f..bc5d0c4a5d 100644 --- a/nemo_reinforcer/models/policy/hf_policy.py +++ b/nemo_reinforcer/models/policy/hf_policy.py @@ -278,6 +278,12 @@ def train( for gb_start in range(0, dataset_size, local_gbs): self.optimizer.zero_grad() mb_losses = [] + + # Calculate number of microbatches to process + # make_microbatch_iterator assumes that the batch size is a multiple of the microbatch size + # so its safe to not check for the case where the last data slice is smaller than mbs + num_microbatches = min(local_gbs, dataset_size - gb_start) // mbs + for mb in data.slice( gb_start, gb_start + local_gbs ).make_microbatch_iterator(mbs): @@ -308,6 +314,9 @@ def train( loss_metrics["lr"] = self.optimizer.param_groups[0]["lr"] # Backward pass + + # Loss is accumulated across microbatches, so we need to scale by the number of microbatches + loss = loss / num_microbatches if not eval_mode: loss.backward() mb_losses.append(loss.item()) @@ -320,7 +329,7 @@ def train( # Update parameters self.optimizer.step() self.scheduler.step() - losses.append(torch.tensor(mb_losses).mean().item()) + losses.append(torch.tensor(mb_losses).sum().item()) # Compute global loss across all ranks with torch.no_grad(): From 1ce29010f9c242b78938046fa82b9e5e8872018a Mon Sep 17 00:00:00 2001 From: Yi-Fu Wu Date: Mon, 31 Mar 2025 09:15:56 -0700 Subject: [PATCH 16/27] fix: Use separate step_metric for GPU Monitoring (#92) Signed-off-by: Yi-Fu Wu Co-authored-by: Terry Kong Signed-off-by: Yi-Fu Wu --- nemo_reinforcer/utils/logger.py | 89 ++++++++-- tests/unit/conftest.py | 4 + tests/unit/utils/test_logger.py | 304 ++++++++++++++++++++++++++++++-- 3 files changed, 372 insertions(+), 25 deletions(-) diff --git a/nemo_reinforcer/utils/logger.py b/nemo_reinforcer/utils/logger.py index 42143e7593..bc0157d564 100644 --- a/nemo_reinforcer/utils/logger.py +++ b/nemo_reinforcer/utils/logger.py @@ -68,7 +68,11 @@ class LoggerInterface(ABC): @abstractmethod def log_metrics( - self, metrics: Dict[str, Any], step: int, prefix: Optional[str] = "" + self, + metrics: Dict[str, Any], + step: int, + prefix: Optional[str] = "", + step_metric: Optional[str] = None, ) -> None: """Log a dictionary of metrics.""" pass @@ -87,7 +91,11 @@ def __init__(self, cfg: TensorboardConfig, log_dir: Optional[str] = None): print(f"Initialized TensorboardLogger at {log_dir}") def log_metrics( - self, metrics: Dict[str, Any], step: int, prefix: Optional[str] = "" + self, + metrics: Dict[str, Any], + step: int, + prefix: Optional[str] = "", + step_metric: Optional[str] = None, # ignored in TensorBoard ) -> None: """Log metrics to Tensorboard. @@ -95,6 +103,7 @@ def log_metrics( metrics: Dict of metrics to log step: Global step value prefix: Optional prefix for metric names + step_metric: Optional step metric name (ignored in TensorBoard) """ for name, value in metrics.items(): if prefix: @@ -120,8 +129,25 @@ def __init__(self, cfg: WandbConfig, log_dir: Optional[str] = None): f"Initialized WandbLogger for project {cfg.get('project')}, run {cfg.get('name')} at {log_dir}" ) + def define_metric( + self, + name: str, + step_metric: Optional[str] = None, + ) -> None: + """Define a metric with custom step metric. + + Args: + name: Name of the metric or pattern (e.g. 'ray/*') + step_metric: Optional name of the step metric to use + """ + self.run.define_metric(name, step_metric=step_metric) + def log_metrics( - self, metrics: Dict[str, Any], step: int, prefix: Optional[str] = "" + self, + metrics: Dict[str, Any], + step: int, + prefix: Optional[str] = "", + step_metric: Optional[str] = None, ) -> None: """Log metrics to wandb. @@ -129,11 +155,21 @@ def log_metrics( metrics: Dict of metrics to log step: Global step value prefix: Optional prefix for metric names + step_metric: Optional name of a field in metrics to use as step instead + of the provided step value """ if prefix: - metrics = {f"{prefix}/{k}": v for k, v in metrics.items()} - - self.run.log(metrics, step=step) + metrics = { + f"{prefix}/{k}" if k != step_metric else k: v + for k, v in metrics.items() + } + + # If step_metric is provided, use the corresponding value from metrics as step + if step_metric and step_metric in metrics: + # commit=False so the step does not get incremented + self.run.log(metrics, commit=False) + else: + self.run.log(metrics, step=step) def log_hyperparams(self, params: Dict[str, Any]) -> None: """Log hyperparameters to wandb. @@ -156,6 +192,8 @@ def __init__( self, collection_interval: int | float, flush_interval: int | float, + metric_prefix: str, + step_metric: str, parent_logger: Optional["Logger"] = None, ): """Initialize the GPU monitor. @@ -163,10 +201,13 @@ def __init__( Args: collection_interval: Interval in seconds to collect GPU metrics flush_interval: Interval in seconds to flush metrics to parent logger + step_metric: Name of the field to use as the step metric parent_logger: Logger to receive the collected metrics """ self.collection_interval = collection_interval self.flush_interval = flush_interval + self.metric_prefix = metric_prefix + self.step_metric = step_metric self.parent_logger = parent_logger self.metrics_buffer: list[ GpuMetricSnapshot @@ -425,7 +466,17 @@ def flush(self): for entry in self.metrics_buffer: step = entry["step"] metrics = entry["metrics"] - self.parent_logger.log_metrics(metrics, step, prefix="ray") + + # Add the step metric directly to metrics for use as step_metric + metrics[self.step_metric] = step + + # Pass step_metric as the step_metric to use it as the step value in wandb + self.parent_logger.log_metrics( + metrics, + step=step, + prefix=self.metric_prefix, + step_metric=self.step_metric, + ) # Clear buffer after logging self.metrics_buffer = [] @@ -448,6 +499,7 @@ def __init__(self, cfg: LoggerConfig): - gpu_flush_interval """ self.loggers = [] + self.wandb_logger = None self.base_log_dir = cfg["log_dir"] os.makedirs(self.base_log_dir, exist_ok=True) @@ -455,8 +507,8 @@ def __init__(self, cfg: LoggerConfig): if cfg["wandb_enabled"]: wandb_log_dir = os.path.join(self.base_log_dir, "wandb") os.makedirs(wandb_log_dir, exist_ok=True) - wandb_logger = WandbLogger(cfg["wandb"], log_dir=wandb_log_dir) - self.loggers.append(wandb_logger) + self.wandb_logger = WandbLogger(cfg["wandb"], log_dir=wandb_log_dir) + self.loggers.append(self.wandb_logger) if cfg["tensorboard_enabled"]: tensorboard_log_dir = os.path.join(self.base_log_dir, "tensorboard") @@ -469,9 +521,18 @@ def __init__(self, cfg: LoggerConfig): # Initialize GPU monitoring if requested self.gpu_monitor = None if cfg["monitor_gpus"]: + metric_prefix = "ray" + step_metric = f"{metric_prefix}/ray_step" + if cfg["wandb_enabled"] and self.wandb_logger: + self.wandb_logger.define_metric( + f"{metric_prefix}/*", step_metric=step_metric + ) + self.gpu_monitor = RayGpuMonitorLogger( collection_interval=cfg["gpu_monitoring"]["collection_interval"], flush_interval=cfg["gpu_monitoring"]["flush_interval"], + metric_prefix=metric_prefix, + step_metric=step_metric, parent_logger=self, ) self.gpu_monitor.start() @@ -480,7 +541,11 @@ def __init__(self, cfg: LoggerConfig): print("No loggers initialized") def log_metrics( - self, metrics: Dict[str, Any], step: int, prefix: Optional[str] = "" + self, + metrics: Dict[str, Any], + step: int, + prefix: Optional[str] = "", + step_metric: Optional[str] = None, ) -> None: """Log metrics to all enabled backends. @@ -488,9 +553,11 @@ def log_metrics( metrics: Dict of metrics to log step: Global step value prefix: Optional prefix for metric names + step_metric: Optional name of a field in metrics to use as step instead + of the provided step value (currently only needed for wandb) """ for logger in self.loggers: - logger.log_metrics(metrics, step, prefix) + logger.log_metrics(metrics, step, prefix, step_metric) def log_hyperparams(self, params: Dict[str, Any]) -> None: """Log hyperparameters to all enabled backends. diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 1dbbd6a169..6056effa57 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -113,6 +113,8 @@ def session_data(request, init_ray_cluster): logger = RayGpuMonitorLogger( collection_interval=float("inf"), flush_interval=float("inf"), + metric_prefix="test", + step_metric="test/step", parent_logger=None, ) unit_test_data["gpu_types"] = list(set(logger._collect_gpu_sku().values())) @@ -209,6 +211,8 @@ def ray_gpu_monitor(init_ray_cluster): gpu_monitor = RayGpuMonitorLogger( collection_interval=1, flush_interval=float("inf"), # Disabling flushing since we will do it manually + metric_prefix="test", + step_metric="test/step", parent_logger=None, ) gpu_monitor.start() diff --git a/tests/unit/utils/test_logger.py b/tests/unit/utils/test_logger.py index dd1c24fb27..ac2ce9cc43 100644 --- a/tests/unit/utils/test_logger.py +++ b/tests/unit/utils/test_logger.py @@ -186,6 +186,67 @@ def test_log_metrics_with_prefix(self, mock_wandb): expected_metrics = {"train/loss": 0.5, "train/accuracy": 0.8} mock_run.log.assert_called_once_with(expected_metrics, step=step) + @patch("nemo_reinforcer.utils.logger.wandb") + def test_log_metrics_with_step_metric(self, mock_wandb): + """Test logging metrics with a step metric to WandbLogger.""" + cfg = {} + logger = WandbLogger(cfg) + + # Define step metric + step_metric = "iteration" + + # Include the step metric in the metrics + metrics = {"loss": 0.5, "accuracy": 0.8, "iteration": 15} + step = 10 # This should be ignored when step_metric is provided + + logger.log_metrics(metrics, step, step_metric=step_metric) + + # Check that log was called with metrics and commit=False + # When using step_metric, step should be ignored and commit=False should be used + mock_run = mock_wandb.init.return_value + mock_run.log.assert_called_once_with(metrics, commit=False) + + @patch("nemo_reinforcer.utils.logger.wandb") + def test_log_metrics_with_prefix_and_step_metric(self, mock_wandb): + """Test logging metrics with both prefix and step metric.""" + cfg = {} + logger = WandbLogger(cfg) + + # Define prefix and step metric + prefix = "train" + step_metric = "train/iteration" + + # Include the step metric in the metrics + metrics = {"loss": 0.5, "accuracy": 0.8, "iteration": 15} + step = 10 # This should be ignored when step_metric is provided + + logger.log_metrics(metrics, step, prefix=prefix, step_metric=step_metric) + + # Check that log was called with prefixed metrics and commit=False + # The step_metric key gets prefixed based on the current implementation + mock_run = mock_wandb.init.return_value + expected_metrics = { + "train/loss": 0.5, + "train/accuracy": 0.8, + "train/iteration": 15, + } + mock_run.log.assert_called_once_with(expected_metrics, commit=False) + + @patch("nemo_reinforcer.utils.logger.wandb") + def test_define_metric(self, mock_wandb): + """Test defining a metric with a custom step metric.""" + cfg = {} + logger = WandbLogger(cfg) + + # Define metric pattern and step metric + logger.define_metric("ray/*", step_metric="ray/ray_step") + + # Check that define_metric was called + mock_run = mock_wandb.init.return_value + mock_run.define_metric.assert_called_once_with( + "ray/*", step_metric="ray/ray_step" + ) + @patch("nemo_reinforcer.utils.logger.wandb") def test_log_hyperparams(self, mock_wandb): """Test logging hyperparameters to WandbLogger.""" @@ -219,11 +280,13 @@ def __init__(self): self.logged_metrics = [] self.logged_steps = [] self.logged_prefixes = [] + self.logged_step_metrics = [] - def log_metrics(self, metrics, step, prefix=""): + def log_metrics(self, metrics, step, prefix="", step_metric=None): self.logged_metrics.append(metrics) self.logged_steps.append(step) self.logged_prefixes.append(prefix) + self.logged_step_metrics.append(step_metric) return MockLogger() @@ -235,12 +298,18 @@ def test_init(self, mock_ray): # Initialize the monitor with standard settings monitor = RayGpuMonitorLogger( - collection_interval=10.0, flush_interval=60.0, parent_logger=None + collection_interval=10.0, + flush_interval=60.0, + metric_prefix="test", + step_metric="test/step", + parent_logger=None, ) # Verify initialization parameters assert monitor.collection_interval == 10.0 assert monitor.flush_interval == 60.0 + assert monitor.metric_prefix == "test" + assert monitor.step_metric == "test/step" assert monitor.parent_logger is None assert monitor.metrics_buffer == [] assert monitor.is_running is False @@ -255,7 +324,11 @@ def test_start(self, mock_thread, mock_ray): # Initialize the monitor monitor = RayGpuMonitorLogger( - collection_interval=10.0, flush_interval=60.0, parent_logger=None + collection_interval=10.0, + flush_interval=60.0, + metric_prefix="test", + step_metric="test/step", + parent_logger=None, ) # Start the monitor @@ -277,7 +350,11 @@ def test_start_ray_not_initialized(self, mock_ray): # Initialize the monitor monitor = RayGpuMonitorLogger( - collection_interval=10.0, flush_interval=60.0, parent_logger=None + collection_interval=10.0, + flush_interval=60.0, + metric_prefix="test", + step_metric="test/step", + parent_logger=None, ) # Starting should raise a ValueError @@ -293,7 +370,11 @@ def test_stop(self, mock_thread, mock_ray): # Initialize the monitor monitor = RayGpuMonitorLogger( - collection_interval=10.0, flush_interval=60.0, parent_logger=None + collection_interval=10.0, + flush_interval=60.0, + metric_prefix="test", + step_metric="test/step", + parent_logger=None, ) # Start the monitor @@ -318,7 +399,11 @@ def test_parse_gpu_metric(self, mock_ray): # Initialize the monitor monitor = RayGpuMonitorLogger( - collection_interval=10.0, flush_interval=60.0, parent_logger=None + collection_interval=10.0, + flush_interval=60.0, + metric_prefix="test", + step_metric="test/step", + parent_logger=None, ) # Create a sample with GPU utilization metric @@ -405,7 +490,11 @@ def test_fetch_and_parse_metrics(self, mock_get, mock_ray): # Initialize the monitor monitor = RayGpuMonitorLogger( - collection_interval=10.0, flush_interval=60.0, parent_logger=None + collection_interval=10.0, + flush_interval=60.0, + metric_prefix="test", + step_metric="test/step", + parent_logger=None, ) # Mock the _parse_gpu_metric method to return expected values @@ -447,7 +536,11 @@ def test_collect_metrics(self, mock_ray): # Initialize the monitor monitor = RayGpuMonitorLogger( - collection_interval=10.0, flush_interval=60.0, parent_logger=None + collection_interval=10.0, + flush_interval=60.0, + metric_prefix="test", + step_metric="test/step", + parent_logger=None, ) # Mock the _fetch_and_parse_metrics method @@ -483,6 +576,8 @@ def test_flush_empty_buffer(self, mock_ray, mock_parent_logger): monitor = RayGpuMonitorLogger( collection_interval=10.0, flush_interval=60.0, + metric_prefix="ray", + step_metric="ray/ray_step", parent_logger=mock_parent_logger, ) @@ -502,6 +597,8 @@ def test_flush(self, mock_ray, mock_parent_logger): monitor = RayGpuMonitorLogger( collection_interval=10.0, flush_interval=60.0, + metric_prefix="ray", + step_metric="ray/ray_step", parent_logger=mock_parent_logger, ) @@ -522,23 +619,68 @@ def test_flush(self, mock_ray, mock_parent_logger): # Verify parent logger's log_metrics was called for each entry assert len(mock_parent_logger.logged_metrics) == 2 - assert mock_parent_logger.logged_metrics[0] == { + + # First metrics entry should include the step metric + expected_first_metrics = { "node.0.gpu.0.gpu": 75.5, "node.0.gpu.0.memory": 4096.0, + "ray/ray_step": 10, # Step metric added } + assert mock_parent_logger.logged_metrics[0] == expected_first_metrics assert mock_parent_logger.logged_steps[0] == 10 assert mock_parent_logger.logged_prefixes[0] == "ray" + assert mock_parent_logger.logged_step_metrics[0] == "ray/ray_step" - assert mock_parent_logger.logged_metrics[1] == { + # Second metrics entry should include the step metric + expected_second_metrics = { "node.0.gpu.0.gpu": 80.0, "node.0.gpu.0.memory": 5120.0, + "ray/ray_step": 20, # Step metric added } + assert mock_parent_logger.logged_metrics[1] == expected_second_metrics assert mock_parent_logger.logged_steps[1] == 20 assert mock_parent_logger.logged_prefixes[1] == "ray" + assert mock_parent_logger.logged_step_metrics[1] == "ray/ray_step" # Verify buffer was cleared assert monitor.metrics_buffer == [] + @patch("nemo_reinforcer.utils.logger.ray") + def test_flush_with_custom_prefix(self, mock_ray, mock_parent_logger): + """Test flush method with custom metric prefix.""" + # Mock ray.is_initialized to return True + mock_ray.is_initialized.return_value = True + + # Initialize the monitor with parent logger and custom prefix + custom_prefix = "custom_metrics" + custom_step_metric = "custom_metrics/step" + monitor = RayGpuMonitorLogger( + collection_interval=10.0, + flush_interval=60.0, + metric_prefix=custom_prefix, + step_metric=custom_step_metric, + parent_logger=mock_parent_logger, + ) + + # Add test metrics to buffer + monitor.metrics_buffer = [ + { + "step": 15, + "metrics": {"node.0.gpu.0.gpu": 60.0}, + } + ] + + # Call flush + monitor.flush() + + # Verify parent logger's log_metrics was called with the custom prefix + assert len(mock_parent_logger.logged_metrics) == 1 + expected_metrics = {"node.0.gpu.0.gpu": 60.0, "custom_metrics/step": 15} + assert mock_parent_logger.logged_metrics[0] == expected_metrics + assert mock_parent_logger.logged_steps[0] == 15 + assert mock_parent_logger.logged_prefixes[0] == custom_prefix + assert mock_parent_logger.logged_step_metrics[0] == custom_step_metric + @patch("nemo_reinforcer.utils.logger.ray") @patch("nemo_reinforcer.utils.logger.time") def test_collection_loop(self, mock_time, mock_ray): @@ -556,7 +698,11 @@ def test_collection_loop(self, mock_time, mock_ray): # Initialize the monitor monitor = RayGpuMonitorLogger( - collection_interval=10.0, flush_interval=60.0, parent_logger=None + collection_interval=10.0, + flush_interval=60.0, + metric_prefix="test", + step_metric="test/step", + parent_logger=None, ) # Set start time and running flag @@ -593,6 +739,89 @@ def side_effect(): # Verify flush was called (flush_interval elapsed) mock_flush.assert_called_once() + @patch("nemo_reinforcer.utils.logger.WandbLogger") + @patch("nemo_reinforcer.utils.logger.TensorboardLogger") + @patch("nemo_reinforcer.utils.logger.RayGpuMonitorLogger") + def test_init_with_gpu_monitoring( + self, mock_gpu_monitor, mock_tb_logger, mock_wandb_logger, temp_dir + ): + """Test initialization with GPU monitoring enabled.""" + cfg = { + "wandb_enabled": True, + "tensorboard_enabled": True, + "monitor_gpus": True, + "gpu_monitoring": { + "collection_interval": 15.0, + "flush_interval": 45.0, + }, + "wandb": {"project": "test-project"}, + "tensorboard": {"log_dir": "test_logs"}, + "log_dir": temp_dir, + } + logger = Logger(cfg) + + # Check that regular loggers were initialized + assert len(logger.loggers) == 2 + mock_wandb_logger.assert_called_once() + mock_tb_logger.assert_called_once() + + # Check that GPU monitor was initialized with correct parameters + mock_gpu_monitor.assert_called_once_with( + collection_interval=15.0, + flush_interval=45.0, + metric_prefix="ray", + step_metric="ray/ray_step", + parent_logger=logger, + ) + + # Check that GPU monitor was started + mock_gpu_instance = mock_gpu_monitor.return_value + mock_gpu_instance.start.assert_called_once() + + # Check that wandb metrics are defined with the step metric + mock_wandb_instance = mock_wandb_logger.return_value + mock_wandb_instance.define_metric.assert_called_once_with( + "ray/*", step_metric="ray/ray_step" + ) + + @patch("nemo_reinforcer.utils.logger.WandbLogger") + @patch("nemo_reinforcer.utils.logger.TensorboardLogger") + @patch("nemo_reinforcer.utils.logger.RayGpuMonitorLogger") + def test_gpu_monitoring_without_wandb( + self, mock_gpu_monitor, mock_tb_logger, mock_wandb_logger, temp_dir + ): + """Test GPU monitoring initialization when wandb is disabled.""" + cfg = { + "wandb_enabled": False, + "tensorboard_enabled": True, + "monitor_gpus": True, + "gpu_monitoring": { + "collection_interval": 15.0, + "flush_interval": 45.0, + }, + "tensorboard": {"log_dir": "test_logs"}, + "log_dir": temp_dir, + } + logger = Logger(cfg) + + # Check that only tensorboard logger was initialized + assert len(logger.loggers) == 1 + mock_wandb_logger.assert_not_called() + mock_tb_logger.assert_called_once() + + # Check that GPU monitor was initialized with correct parameters + mock_gpu_monitor.assert_called_once_with( + collection_interval=15.0, + flush_interval=45.0, + metric_prefix="ray", + step_metric="ray/ray_step", + parent_logger=logger, + ) + + # Since wandb is disabled, define_metric should not be called + mock_wandb_instance = mock_wandb_logger.return_value + assert not mock_wandb_instance.define_metric.called + class TestLogger: """Test the main Logger class.""" @@ -704,8 +933,8 @@ def test_log_metrics(self, mock_tb_logger, mock_wandb_logger, temp_dir): logger.log_metrics(metrics, step) # Check that log_metrics was called on both loggers - mock_wandb_instance.log_metrics.assert_called_once_with(metrics, step, "") - mock_tb_instance.log_metrics.assert_called_once_with(metrics, step, "") + mock_wandb_instance.log_metrics.assert_called_once_with(metrics, step, "", None) + mock_tb_instance.log_metrics.assert_called_once_with(metrics, step, "", None) @patch("nemo_reinforcer.utils.logger.WandbLogger") @patch("nemo_reinforcer.utils.logger.TensorboardLogger") @@ -760,9 +989,56 @@ def test_init_with_gpu_monitoring( # Check that GPU monitor was initialized with correct parameters mock_gpu_monitor.assert_called_once_with( - collection_interval=15.0, flush_interval=45.0, parent_logger=logger + collection_interval=15.0, + flush_interval=45.0, + metric_prefix="ray", + step_metric="ray/ray_step", + parent_logger=logger, ) # Check that GPU monitor was started mock_gpu_instance = mock_gpu_monitor.return_value mock_gpu_instance.start.assert_called_once() + + # Check that wandb metrics are defined with the step metric + mock_wandb_instance = mock_wandb_logger.return_value + mock_wandb_instance.define_metric.assert_called_once_with( + "ray/*", step_metric="ray/ray_step" + ) + + @patch("nemo_reinforcer.utils.logger.WandbLogger") + @patch("nemo_reinforcer.utils.logger.TensorboardLogger") + def test_log_metrics_with_prefix_and_step_metric( + self, mock_tb_logger, mock_wandb_logger, temp_dir + ): + """Test logging metrics with prefix and step_metric.""" + cfg = { + "wandb_enabled": True, + "tensorboard_enabled": True, + "monitor_gpus": False, + "wandb": {"project": "test-project"}, + "tensorboard": {"log_dir": "test_logs"}, + "log_dir": temp_dir, + } + logger = Logger(cfg) + + # Create mock logger instances + mock_wandb_instance = mock_wandb_logger.return_value + mock_tb_instance = mock_tb_logger.return_value + + # Create metrics with a step metric field + metrics = {"loss": 0.5, "accuracy": 0.8, "iteration": 15} + step = 10 + prefix = "train" + step_metric = "iteration" + + # Log metrics with prefix and step_metric + logger.log_metrics(metrics, step, prefix=prefix, step_metric=step_metric) + + # Check that log_metrics was called on both loggers with correct parameters + mock_wandb_instance.log_metrics.assert_called_once_with( + metrics, step, prefix, step_metric + ) + mock_tb_instance.log_metrics.assert_called_once_with( + metrics, step, prefix, step_metric + ) From e93d6979821fedee49da74f3165e9189008f2118 Mon Sep 17 00:00:00 2001 From: Anna Shors Date: Mon, 31 Mar 2025 11:30:57 -0700 Subject: [PATCH 17/27] feat: Update sft config to use single GPU (#90) Signed-off-by: ashors1 Signed-off-by: Oleksii Kuchaiev Co-authored-by: Oleksii Kuchaiev Co-authored-by: Terry Kong Signed-off-by: Yi-Fu Wu --- README.md | 14 ++++++++------ examples/configs/sft.yaml | 27 ++++++++++----------------- 2 files changed, 18 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index ded81200f7..aeab19f33c 100644 --- a/README.md +++ b/README.md @@ -60,20 +60,22 @@ We provide a sample SFT experiment that uses the [SQuAD dataset](https://rajpurk #### Single Node -The experiment is set up to run on 8 GPUs. If using a machine that has access to 8 GPUs, you can launch the experiment as follows: +The default SFT experiment is configured to run on a single GPU. To launch the experiment, ```sh uv run python examples/run_sft.py ``` -This trains `Llama3.1-8B` on 8 GPUs. To run on a single GPU, we'll have to override a few of the experiment settings. We replace the 8B model with a smaller 1B model, decrease the batch size, and update the cluster configuration to use a single gpu: +This trains `Llama3.2-1B` on one GPU using SQUAD dataset. + +If you have access to more GPUs, you can update the experiment accordingly. To run on 8 GPUs, we update the cluster configuration. We also switch to an 8B Llama base model and increase the batch size: ```sh uv run python examples/run_sft.py \ - policy.model_name="meta-llama/Llama-3.2-1B" \ - policy.train_global_batch_size=16 \ - sft.val_global_batch_size=16 \ - cluster.gpus_per_node=1 + policy.model_name="meta-llama/Meta-Llama-3-8B" \ + policy.train_global_batch_size=128 \ + sft.val_global_batch_size=128 \ + cluster.gpus_per_node=8 ``` Refer to [sft.yaml](examples/configs/sft.yaml) for a full list of parameters that can be overridden. diff --git a/examples/configs/sft.yaml b/examples/configs/sft.yaml index 8bb0c7615b..c028e83dc2 100644 --- a/examples/configs/sft.yaml +++ b/examples/configs/sft.yaml @@ -1,9 +1,9 @@ # SFT Algorithm Configuration sft: - max_num_steps: 1000 + max_num_steps: 60 val_period: 10 val_batches: 8 - val_global_batch_size: 128 + val_global_batch_size: 32 val_micro_batch_size: 1 val_at_start: true seed: 42 @@ -17,10 +17,10 @@ checkpointing: save_period: 10 policy: - model_name: "meta-llama/Meta-Llama-3-8B" - train_global_batch_size: 128 + model_name: "meta-llama/Llama-3.2-1B" + train_global_batch_size: 32 train_micro_batch_size: 1 - max_total_sequence_length: 2048 + max_total_sequence_length: 1024 precision: "float32" expandable_segments_enabled: false fsdp_offload_enabled: false @@ -32,13 +32,6 @@ policy: weight_decay: 0.1 betas: [0.9, 0.98] eps: 1e-5 - - scheduler: - name: "torch.optim.lr_scheduler.LinearLR" - kwargs: - start_factor: 0.0196078 - end_factor: 1.0 - total_iters: 50 data: max_input_seq_length: ${policy.max_total_sequence_length} @@ -46,18 +39,18 @@ data: logger: log_dir: "logs" # Base directory for all logs - wandb_enabled: false - tensorboard_enabled: false + wandb_enabled: true # Make sure you do ``wandb login [Your API key]'' before run + tensorboard_enabled: true monitor_gpus: false # If true, will monitor GPU usage and log to wandb and/or tensorboard wandb: project: "sft-dev" - name: "sft-dev-logger" + name: "sft-dev-${data.dataset_name}" tensorboard: - log_dir: "tb_logs" + log_dir: "tb_logs-sft-dev-${data.dataset_name}" gpu_monitoring: collection_interval: 10 # How often to collect GPU usage metrics (in seconds) flush_interval: 10 # How often to flush GPU usage metrics to the loggers (in seconds) cluster: - gpus_per_node: 8 + gpus_per_node: 1 num_nodes: 1 From c0aa9896dbe694046e65d90fb2acefb4819dd1bb Mon Sep 17 00:00:00 2001 From: Sahil Jain <48468750+SahilJain314@users.noreply.github.com> Date: Mon, 31 Mar 2025 11:42:48 -0700 Subject: [PATCH 18/27] fix: Grammar nit (#98) Signed-off-by: Sahil Jain Signed-off-by: Yi-Fu Wu --- README.md | 2 +- examples/configs/sft.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index aeab19f33c..4b0b5cbba9 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,7 @@ The default SFT experiment is configured to run on a single GPU. To launch the e uv run python examples/run_sft.py ``` -This trains `Llama3.2-1B` on one GPU using SQUAD dataset. +This trains `Llama3.2-1B` on one GPU using the SQUAD dataset. If you have access to more GPUs, you can update the experiment accordingly. To run on 8 GPUs, we update the cluster configuration. We also switch to an 8B Llama base model and increase the batch size: diff --git a/examples/configs/sft.yaml b/examples/configs/sft.yaml index c028e83dc2..3b20e4cc93 100644 --- a/examples/configs/sft.yaml +++ b/examples/configs/sft.yaml @@ -39,7 +39,7 @@ data: logger: log_dir: "logs" # Base directory for all logs - wandb_enabled: true # Make sure you do ``wandb login [Your API key]'' before run + wandb_enabled: true # Make sure you do a ``wandb login [Your API key]'' before running tensorboard_enabled: true monitor_gpus: false # If true, will monitor GPU usage and log to wandb and/or tensorboard wandb: From e27b5fd6b7a341aec09bf3e0e2b4efd0eddd1293 Mon Sep 17 00:00:00 2001 From: Parth Chadha Date: Mon, 31 Mar 2025 12:11:37 -0700 Subject: [PATCH 19/27] =?UTF-8?q?feat:=20add=20capability=20to=20set=20min?= =?UTF-8?q?/max=20eps=20separately=20as=20proposed=20in=20the=E2=80=A6=20(?= =?UTF-8?q?#95)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Parth Chadha Co-authored-by: Sahil Jain <48468750+SahilJain314@users.noreply.github.com> Signed-off-by: Yi-Fu Wu --- .gitignore | 1 + examples/configs/grpo_math_1B.yaml | 3 ++- nemo_reinforcer/algorithms/loss_functions.py | 14 +++++++++++--- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 79a00631e6..0d7a81c424 100644 --- a/.gitignore +++ b/.gitignore @@ -28,3 +28,4 @@ docker/ wandb/ checkpoints/ results/ +code_snapshots/ diff --git a/examples/configs/grpo_math_1B.yaml b/examples/configs/grpo_math_1B.yaml index f15031c536..a7cb644114 100644 --- a/examples/configs/grpo_math_1B.yaml +++ b/examples/configs/grpo_math_1B.yaml @@ -12,7 +12,8 @@ grpo: loss_fn: reference_policy_kl_penalty: 0.01 - ratio_eps: 0.2 + ratio_eps_min: 0.2 + ratio_eps_max: 0.2 checkpointing: enabled: true diff --git a/nemo_reinforcer/algorithms/loss_functions.py b/nemo_reinforcer/algorithms/loss_functions.py index 8504dac007..6a5d6b593d 100644 --- a/nemo_reinforcer/algorithms/loss_functions.py +++ b/nemo_reinforcer/algorithms/loss_functions.py @@ -25,7 +25,8 @@ class ClippedPGLossConfig(TypedDict): reference_policy_kl_penalty: float - ratio_eps: float + ratio_eps_min: float + ratio_eps_max: float class ClippedPGLossDataDict(TypedDict): @@ -57,6 +58,10 @@ class ClippedPGLossFn(LossFunction): - r_t(θ) = π_θ(a_t|s_t) / π_θ_old(a_t|s_t) is the probability ratio - A_t is the advantage estimate - ε is the clip parameter (ratio_eps) + - As proposed in the DAPO paper (https://arxiv.org/pdf/2503.14476), + we allow setting a distinct minimum and maximum value for the clip parameter (set to the same value for PPO/GRPO/etc.) + - ratio_eps_min: minimum value for the clip parameter + - ratio_eps_max: maximum value for the clip parameter - β is the KL penalty coefficient (reference_policy_kl_penalty) - KL(π_θ || π_ref) is the KL divergence between the current policy and reference policy (Schulman Approx.) @@ -65,7 +70,8 @@ class ClippedPGLossFn(LossFunction): """ def __init__(self, cfg: ClippedPGLossConfig): - self.ratio_eps = cfg["ratio_eps"] + self.ratio_eps_min = cfg["ratio_eps_min"] + self.ratio_eps_max = cfg["ratio_eps_max"] self.reference_policy_kl_penalty = cfg["reference_policy_kl_penalty"] self.disable_ppo_ratio = cfg.get("disable_ppo_ratio", False) @@ -108,7 +114,9 @@ def __call__( # Calculate clipped loss function if ppo ratio is enabled. if not self.disable_ppo_ratio: ratios = (curr_logprobs - prev_logprobs).exp() - ratios_clamped = ratios.clamp(1.0 - self.ratio_eps, 1.0 + self.ratio_eps) + ratios_clamped = ratios.clamp( + 1.0 - self.ratio_eps_min, 1.0 + self.ratio_eps_max + ) else: ratios = curr_logprobs ratios_clamped = curr_logprobs From a7b2e2bd78088596f9794b4281f95bed12794e43 Mon Sep 17 00:00:00 2001 From: Zhaocheng Zhu Date: Mon, 31 Mar 2025 14:55:45 -0700 Subject: [PATCH 20/27] fix: change format messages to out of place (#77) Signed-off-by: KiddoZhu Signed-off-by: Sahil Jain Co-authored-by: Parth Chadha Co-authored-by: Sahil Jain <48468750+SahilJain314@users.noreply.github.com> Signed-off-by: Yi-Fu Wu --- nemo_reinforcer/algorithms/loss_functions.py | 2 +- nemo_reinforcer/data/llm_message_utils.py | 15 +++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/nemo_reinforcer/algorithms/loss_functions.py b/nemo_reinforcer/algorithms/loss_functions.py index 6a5d6b593d..158c9824eb 100644 --- a/nemo_reinforcer/algorithms/loss_functions.py +++ b/nemo_reinforcer/algorithms/loss_functions.py @@ -58,7 +58,7 @@ class ClippedPGLossFn(LossFunction): - r_t(θ) = π_θ(a_t|s_t) / π_θ_old(a_t|s_t) is the probability ratio - A_t is the advantage estimate - ε is the clip parameter (ratio_eps) - - As proposed in the DAPO paper (https://arxiv.org/pdf/2503.14476), + - As proposed in the DAPO paper (https://arxiv.org/pdf/2503.14476), we allow setting a distinct minimum and maximum value for the clip parameter (set to the same value for PPO/GRPO/etc.) - ratio_eps_min: minimum value for the clip parameter - ratio_eps_max: maximum value for the clip parameter diff --git a/nemo_reinforcer/data/llm_message_utils.py b/nemo_reinforcer/data/llm_message_utils.py index 5ae8bee9a8..f6cb3c8079 100644 --- a/nemo_reinforcer/data/llm_message_utils.py +++ b/nemo_reinforcer/data/llm_message_utils.py @@ -353,14 +353,13 @@ def get_formatted_message_log( Returns: The message log with updated 'token_ids' and 'content' fields. """ - cu_message = [] + new_message_log = [] prev_formatted_message = "" template = task_data_spec.custom_template for i, message in enumerate(message_log): - cu_message.append(message.copy()) formatted_message = tokenizer.apply_chat_template( - cu_message, + message_log[: i + 1], chat_template=template, add_generation_prompt=False, tokenize=False, @@ -383,13 +382,17 @@ def get_formatted_message_log( message_chunk = message_chunk.rstrip("\n") if not message_chunk.endswith(tokenizer.eos_token): message_chunk += tokenizer.eos_token - message["token_ids"] = tokenizer( + + new_message = message.copy() + new_message["token_ids"] = tokenizer( message_chunk, return_tensors="pt", add_special_tokens=False )["input_ids"][0] - message["content"] = message_chunk + new_message["content"] = message_chunk + new_message_log.append(new_message) + prev_formatted_message = formatted_message - return message_log + return new_message_log def remap_dataset_keys( From b17069c70a2f7419de75e5d4aed17185b2476424 Mon Sep 17 00:00:00 2001 From: Terry Kong Date: Mon, 31 Mar 2025 19:03:54 -0700 Subject: [PATCH 21/27] fix: correct version and use setuptools.dynamic metadata for version/readme (#104) Signed-off-by: Terry Kong Signed-off-by: Yi-Fu Wu --- nemo_reinforcer/package_info.py | 4 ++-- pyproject.toml | 10 ++++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/nemo_reinforcer/package_info.py b/nemo_reinforcer/package_info.py index ea2977b049..8ca9d4a97b 100644 --- a/nemo_reinforcer/package_info.py +++ b/nemo_reinforcer/package_info.py @@ -14,9 +14,9 @@ MAJOR = 0 -MINOR = 1 +MINOR = 2 PATCH = 0 -PRE_RELEASE = "" +PRE_RELEASE = "dev" # Use the following formatting: (major, minor, patch, pre-release) VERSION = (MAJOR, MINOR, PATCH, PRE_RELEASE) diff --git a/pyproject.toml b/pyproject.toml index eddc57a15b..e1e538304e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,9 +4,11 @@ build-backend = "setuptools.build_meta" [project] name = "nemo-reinforcer" -version = "0.0.1" +dynamic = [ + "version", + "readme", +] description = "Nemo-Reinforcer: A Scalable and Efficient Post-Training Library for Models Ranging from 1 GPU to 1000s, and from Tiny to >100B Parameters" -readme = "README.md" requires-python = ">=3.10" license = {text = "Apache 2.0"} dependencies = [ @@ -29,6 +31,10 @@ dependencies = [ [tool.setuptools] packages = ["nemo_reinforcer"] +[tool.setuptools.dynamic] +version = {attr = "nemo_reinforcer.__version__"} # any module attribute compatible with ast.literal_eval +readme = {file = "README.md", content-type = "text/markdown"} + [project.optional-dependencies] build = [ "torch==2.6.0", From 6f2c31e7cb653e90c944744c70aac9be9e56ce47 Mon Sep 17 00:00:00 2001 From: Parth Chadha Date: Tue, 1 Apr 2025 07:01:24 -0700 Subject: [PATCH 22/27] =?UTF-8?q?fix:=20remove=20usage=20of=20vllm=20to=20?= =?UTF-8?q?get=20device=20uuid=20and=20instead=20use=20nvidia-m=E2=80=A6?= =?UTF-8?q?=20(#105)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Parth Chadha Signed-off-by: Yi-Fu Wu --- nemo_reinforcer/models/generation/vllm.py | 4 +- .../models/generation/vllm_backend.py | 5 +- nemo_reinforcer/models/policy/hf_policy.py | 17 +++-- nemo_reinforcer/utils/nvml.py | 66 +++++++++++++++++++ pyproject.toml | 1 + tests/unit/utils/test_pynvml.py | 61 +++++++++++++++++ 6 files changed, 143 insertions(+), 11 deletions(-) create mode 100644 nemo_reinforcer/utils/nvml.py create mode 100644 tests/unit/utils/test_pynvml.py diff --git a/nemo_reinforcer/models/generation/vllm.py b/nemo_reinforcer/models/generation/vllm.py index 2395b65e34..edef3c69ae 100644 --- a/nemo_reinforcer/models/generation/vllm.py +++ b/nemo_reinforcer/models/generation/vllm.py @@ -97,6 +97,7 @@ def configure_worker( # Force vllm to use v0 runtime (will be enabled by default in #51) env_vars["VLLM_USE_V1"] = "0" + return resources, env_vars, init_kwargs def __init__( @@ -379,9 +380,6 @@ def shutdown(self): return False def report_device_id(self) -> str: - # from vllm.platforms import current_platform - # self.device_uuid = current_platform.get_device_uuid(self.rank) - # return self.device_uuid return self.llm.collective_rpc("report_device_id", args=tuple())[0] def update_weights_from_ipc_handles(self, ipc_handles): diff --git a/nemo_reinforcer/models/generation/vllm_backend.py b/nemo_reinforcer/models/generation/vllm_backend.py index a8ded9b519..3507d7dc55 100644 --- a/nemo_reinforcer/models/generation/vllm_backend.py +++ b/nemo_reinforcer/models/generation/vllm_backend.py @@ -25,10 +25,9 @@ class UpdatableVllmInternalWorker(Worker): def report_device_id(self) -> str: - from vllm.platforms import current_platform + from nemo_reinforcer.utils.nvml import get_device_uuid - self.device_uuid = current_platform.get_device_uuid(self.device.index) - return self.device_uuid + return get_device_uuid(self.device.index) def update_weights_from_ipc_handles(self, ipc_handles): """Update weights from IPC handles. diff --git a/nemo_reinforcer/models/policy/hf_policy.py b/nemo_reinforcer/models/policy/hf_policy.py index bc5d0c4a5d..8d0f67f230 100644 --- a/nemo_reinforcer/models/policy/hf_policy.py +++ b/nemo_reinforcer/models/policy/hf_policy.py @@ -696,10 +696,17 @@ def zero_out_weights(self): torch.cuda.synchronize() def report_device_id(self) -> str: - from vllm.platforms import current_platform + """Report the UUID of the current CUDA device using NVML. - self.device_uuid = current_platform.get_device_uuid(torch.cuda.current_device()) - return self.device_uuid + Returns: + str: UUID of the device in the format "GPU-xxxxx" + """ + from nemo_reinforcer.utils.nvml import get_device_uuid + + # Get current device index from torch + device_idx = torch.cuda.current_device() + # Get device UUID using NVML + return get_device_uuid(device_idx) @torch.no_grad() def get_weight_ipc_handles(self, offload_model=True): @@ -718,7 +725,7 @@ def get_weight_ipc_handles(self, offload_model=True): params = dtype_params self._held_reference_model_params = params data = {} - self.device_uuid = self.report_device_id() + device_uuid = self.report_device_id() for name, p in params.items(): data[name] = reduce_tensor(p.detach()) @@ -726,7 +733,7 @@ def get_weight_ipc_handles(self, offload_model=True): self.model = self.manual_offload_to_cpu(self.model) gc.collect() torch.cuda.empty_cache() - return {self.device_uuid: data} + return {device_uuid: data} def prepare_for_lp_inference(self): self.model = self.manual_load_to_gpu(self.model) diff --git a/nemo_reinforcer/utils/nvml.py b/nemo_reinforcer/utils/nvml.py new file mode 100644 index 0000000000..2f684effef --- /dev/null +++ b/nemo_reinforcer/utils/nvml.py @@ -0,0 +1,66 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import contextlib +import os +import pynvml + + +@contextlib.contextmanager +def nvml_context(): + """Context manager for NVML initialization and shutdown. + + Raises: + RuntimeError: If NVML initialization fails + """ + try: + pynvml.nvmlInit() + yield + except pynvml.NVMLError as e: + raise RuntimeError(f"Failed to initialize NVML: {e}") + finally: + try: + pynvml.nvmlShutdown() + except: + pass + + +def device_id_to_physical_device_id(device_id: int) -> int: + """Convert a logical device ID to a physical device ID considering CUDA_VISIBLE_DEVICES.""" + if "CUDA_VISIBLE_DEVICES" in os.environ: + device_ids = os.environ["CUDA_VISIBLE_DEVICES"].split(",") + try: + physical_device_id = int(device_ids[device_id]) + return physical_device_id + except ValueError: + raise RuntimeError( + f"Failed to convert logical device ID {device_id} to physical device ID. Available devices are: {device_ids}." + ) + else: + return device_id + + +def get_device_uuid(device_idx: int) -> str: + """Get the UUID of a CUDA device using NVML.""" + # Convert logical device index to physical device index + global_device_idx = device_id_to_physical_device_id(device_idx) + + # Get the device handle and UUID + with nvml_context(): + try: + handle = pynvml.nvmlDeviceGetHandleByIndex(global_device_idx) + return pynvml.nvmlDeviceGetUUID(handle) + except pynvml.NVMLError as e: + raise RuntimeError( + f"Failed to get device UUID for device {device_idx} (global index: {global_device_idx}): {e}" + ) diff --git a/pyproject.toml b/pyproject.toml index e1e538304e..8397211ce6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,7 @@ dependencies = [ "omegaconf", "torchdata", "vllm==0.8.0", + "nvidia-ml-py", ] [tool.setuptools] diff --git a/tests/unit/utils/test_pynvml.py b/tests/unit/utils/test_pynvml.py new file mode 100644 index 0000000000..cf7044654f --- /dev/null +++ b/tests/unit/utils/test_pynvml.py @@ -0,0 +1,61 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +from unittest.mock import patch + +from nemo_reinforcer.utils.nvml import ( + nvml_context, + device_id_to_physical_device_id, + get_device_uuid, +) + + +@patch("nemo_reinforcer.utils.nvml.pynvml") +def test_nvml_context(mock_pynvml): + """Test that nvml_context initializes and shuts down NVML.""" + with nvml_context(): + pass + + # Verify init and shutdown were called + mock_pynvml.nvmlInit.assert_called_once() + mock_pynvml.nvmlShutdown.assert_called_once() + + +def test_device_id_conversion(): + """Test device ID conversion with and without CUDA_VISIBLE_DEVICES.""" + with patch.dict(os.environ, {}, clear=True): + assert device_id_to_physical_device_id(0) == 0 + + with patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "2,3"}): + assert device_id_to_physical_device_id(0) == 2 + assert device_id_to_physical_device_id(1) == 3 + + +@patch("nemo_reinforcer.utils.nvml.device_id_to_physical_device_id") +@patch("nemo_reinforcer.utils.nvml.pynvml") +def test_get_device_uuid(mock_pynvml, mock_convert_id): + """Test that get_device_uuid correctly retrieves a UUID.""" + + # Setup + mock_convert_id.return_value = 1 + mock_handle = mock_pynvml.nvmlDeviceGetHandleByIndex.return_value + mock_pynvml.nvmlDeviceGetUUID.return_value = b"GPU-12345" + + # Call function + uuid = get_device_uuid(0) + + # Verify + assert uuid == b"GPU-12345" + mock_convert_id.assert_called_once_with(0) + mock_pynvml.nvmlDeviceGetHandleByIndex.assert_called_once_with(1) From 4e20786b70e8730f974de1af2f621ee03c7d4e21 Mon Sep 17 00:00:00 2001 From: Hemil Desai Date: Tue, 1 Apr 2025 11:21:35 -0700 Subject: [PATCH 23/27] fix: Change optional-dependencies to dependency-groups (#81) Signed-off-by: Hemil Desai Signed-off-by: Yi-Fu Wu --- .github/workflows/cicd-main.yml | 20 ++++++++++---------- docker/Dockerfile | 2 +- docs/documentation.md | 6 +++--- pyproject.toml | 6 +++--- tests/run_unit_in_docker.sh | 2 +- 5 files changed, 18 insertions(+), 18 deletions(-) diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml index 7902c02146..4f20b77cda 100644 --- a/.github/workflows/cicd-main.yml +++ b/.github/workflows/cicd-main.yml @@ -16,8 +16,8 @@ name: "CICD Reinforcer" on: pull_request: branches: - - 'main' - - 'r**' + - "main" + - "r**" types: [labeled] merge_group: types: [checks_requested] @@ -81,14 +81,14 @@ jobs: # Some output that's helpful for debugging echo "Docs changed: $CHANGED_DOCS" echo "Src changed: $CHANGED_SRC" - + # echo "DOCS_ONLY: $DOCS_ONLY" echo "LABEL: $LABEL" echo "IS_PULLREQUEST: $IS_PULLREQUEST" - + # Run CI only (on main or if label is attached) and if it's not only docs echo run_ci=$([[ ("$LABEL" = "true" || "$IS_PULLREQUEST" = "false" || "$MERGE_GROUP" = "true") && "$DOCS_ONLY" = "false" ]] && echo "true" || echo "false") | tee -a "$GITHUB_OUTPUT" - + lint-check: name: Lint check needs: [pre-flight] @@ -102,7 +102,7 @@ jobs: pip install pre-commit pre-commit install pre-commit run --all-files --show-diff-on-failure --color=always - + sphinx-build: name: Sphinx build needs: [pre-flight] @@ -115,7 +115,7 @@ jobs: run: | pip install uv cd docs/ - uv run --extra docs sphinx-build . _build/html + uv run --group docs sphinx-build . _build/html build-container: if: ${{ needs.pre-flight.outputs.run_ci == 'true' }} @@ -140,7 +140,7 @@ jobs: TIMEOUT: 10 SCRIPT: | cd ${REINFORCER_REPO_DIR}/docs - uv run --extra docs sphinx-build -b doctest . _build/doctest + uv run --group docs sphinx-build -b doctest . _build/doctest secrets: HF_TOKEN: ${{ secrets.HF_TOKEN }} @@ -154,7 +154,7 @@ jobs: TIMEOUT: 15 SCRIPT: | cd ${REINFORCER_REPO_DIR} - uv run --extra test bash -x ./tests/run_unit.sh + uv run --group test bash -x ./tests/run_unit.sh FINAL_SCRIPT_EXTERNAL: | cat < Date: Tue, 1 Apr 2025 12:06:51 -0700 Subject: [PATCH 24/27] feat: Add support for hydra style overrides (#80) Signed-off-by: Hemil Desai Signed-off-by: Yi-Fu Wu --- examples/run_grpo_math.py | 26 +++++++-------- nemo_reinforcer/utils/config.py | 31 ++++++++++++++++++ pyproject.toml | 1 + tests/unit/utils/test_config.py | 58 +++++++++++++++++++++++++++++++++ 4 files changed, 101 insertions(+), 15 deletions(-) diff --git a/examples/run_grpo_math.py b/examples/run_grpo_math.py index b87c7037b3..c67829e80e 100644 --- a/examples/run_grpo_math.py +++ b/examples/run_grpo_math.py @@ -15,24 +15,23 @@ import argparse import os import pprint - -from omegaconf import OmegaConf -from typing import Dict, Any +from collections import defaultdict +from typing import Any, Dict from datasets import load_dataset +from omegaconf import OmegaConf from transformers import AutoTokenizer -from collections import defaultdict from nemo_reinforcer.algorithms.grpo import MasterConfig, grpo_train, setup -from nemo_reinforcer.distributed.virtual_cluster import init_ray -from nemo_reinforcer.utils.config import load_config -from nemo_reinforcer.utils.logger import get_next_experiment_dir -from nemo_reinforcer.data.interfaces import TaskDataSpec, DatumSpec, LLMMessageLogType from nemo_reinforcer.data import DataConfig -from nemo_reinforcer.models.policy import PolicyConfig from nemo_reinforcer.data.datasets import AllTaskProcessedDataset, rl_collate_fn -from nemo_reinforcer.environments.math_environment import MathEnvironment from nemo_reinforcer.data.hf_datasets.openmathinstruct2 import OpenMathInstruct2Dataset +from nemo_reinforcer.data.interfaces import DatumSpec, LLMMessageLogType, TaskDataSpec +from nemo_reinforcer.distributed.virtual_cluster import init_ray +from nemo_reinforcer.environments.math_environment import MathEnvironment +from nemo_reinforcer.models.policy import PolicyConfig +from nemo_reinforcer.utils.config import load_config, parse_hydra_overrides +from nemo_reinforcer.utils.logger import get_next_experiment_dir def parse_args(): @@ -43,10 +42,7 @@ def parse_args(): ) # Parse known args for the script - args, remaining = parser.parse_known_args() - - # Convert remaining args to OmegaConf format - overrides = OmegaConf.from_dotlist(remaining) + args, overrides = parser.parse_known_args() return args, overrides @@ -242,7 +238,7 @@ def main(): if overrides: print(f"Overrides: {overrides}") - config = OmegaConf.merge(config, overrides) + config = parse_hydra_overrides(config, overrides) config: MasterConfig = OmegaConf.to_container(config, resolve=True) print("Applied CLI overrides") diff --git a/nemo_reinforcer/utils/config.py b/nemo_reinforcer/utils/config.py index 51418e3b1b..82c4be462b 100644 --- a/nemo_reinforcer/utils/config.py +++ b/nemo_reinforcer/utils/config.py @@ -15,6 +15,8 @@ from pathlib import Path from typing import Optional, Union +from hydra._internal.config_loader_impl import ConfigLoaderImpl +from hydra.core.override_parser.overrides_parser import OverridesParser from omegaconf import DictConfig, ListConfig, OmegaConf @@ -130,3 +132,32 @@ def load_config(config_path: Union[str, Path]) -> DictConfig: Merged config dictionary """ return load_config_with_inheritance(config_path) + + +class OverridesError(Exception): + """Custom exception for Hydra override parsing errors.""" + + pass + + +def parse_hydra_overrides(cfg: DictConfig, overrides: list[str]) -> DictConfig: + """Parse and apply Hydra overrides to an OmegaConf config. + + Args: + cfg: OmegaConf config to apply overrides to + overrides: List of Hydra override strings + + Returns: + Updated config with overrides applied + + Raises: + OverridesError: If there's an error parsing or applying overrides + """ + try: + OmegaConf.set_struct(cfg, True) + parser = OverridesParser.create() + parsed = parser.parse_overrides(overrides=overrides) + ConfigLoaderImpl._apply_overrides_to_config(overrides=parsed, cfg=cfg) + return cfg + except Exception as e: + raise OverridesError(f"Failed to parse Hydra overrides: {str(e)}") from e diff --git a/pyproject.toml b/pyproject.toml index 752f25047d..5f72e283f9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,7 @@ dependencies = [ "torchdata", "vllm==0.8.0", "nvidia-ml-py", + "hydra-core", ] [tool.setuptools] diff --git a/tests/unit/utils/test_config.py b/tests/unit/utils/test_config.py index 245d9e0053..b8aa5328d2 100644 --- a/tests/unit/utils/test_config.py +++ b/tests/unit/utils/test_config.py @@ -196,3 +196,61 @@ def test_interpolation(temp_config_dir): config = load_config(child_path) assert config.base_value == 43 assert config.derived.value == 43 # Interpolation uses child's base_value + + +def test_parse_hydra_overrides(): + """Test parsing and applying Hydra overrides.""" + from omegaconf import OmegaConf + + from nemo_reinforcer.utils.config import OverridesError, parse_hydra_overrides + + # Create initial config + cfg = OmegaConf.create( + { + "model": {"type": "default", "hidden_size": 768}, + "training": {"batch_size": 32, "learning_rate": 1e-4}, + } + ) + + # Test basic override + overrides = ["model.type=transformer"] + updated_cfg = parse_hydra_overrides(cfg, overrides) + assert updated_cfg.model.type == "transformer" + assert updated_cfg.model.hidden_size == 768 # Unchanged + + # Test nested override + overrides = ["model.hidden_size=1024"] + updated_cfg = parse_hydra_overrides(cfg, overrides) + assert updated_cfg.model.hidden_size == 1024 + + # Test multiple overrides + overrides = ["training.batch_size=64", "training.learning_rate=2e-4"] + updated_cfg = parse_hydra_overrides(cfg, overrides) + assert updated_cfg.training.batch_size == 64 + assert updated_cfg.training.learning_rate == 2e-4 + + # Test invalid override + overrides = ["nonexistent.key=value"] + with pytest.raises(OverridesError): + parse_hydra_overrides(cfg, overrides) + + # Test invalid syntax + overrides = ["invalid.syntax"] + with pytest.raises(OverridesError): + parse_hydra_overrides(cfg, overrides) + + # Test empty overrides + overrides = [] + updated_cfg = parse_hydra_overrides(cfg, overrides) + assert updated_cfg == cfg # Config should be unchanged + + # Test override additions and deletions + overrides = [ + "+model.num_layers=12", + "++model.type=transformer", + "~training.batch_size", + ] + updated_cfg = parse_hydra_overrides(cfg, overrides) + assert updated_cfg.model.num_layers == 12 + assert updated_cfg.model.type == "transformer" + assert "batch_size" not in updated_cfg.training From f87474667e140f653435d66620aac37312e3b517 Mon Sep 17 00:00:00 2001 From: Anna Shors Date: Tue, 1 Apr 2025 12:44:29 -0700 Subject: [PATCH 25/27] fix: Do not initialize reference model for sft (#71) Signed-off-by: ashors1 Signed-off-by: Anna Shors Co-authored-by: Terry Kong --- nemo_reinforcer/algorithms/sft.py | 1 + nemo_reinforcer/models/policy/hf_policy.py | 26 +++++++++++-------- .../unit/models/policy/test_hf_ray_policy.py | 10 ++++--- 3 files changed, 23 insertions(+), 14 deletions(-) diff --git a/nemo_reinforcer/algorithms/sft.py b/nemo_reinforcer/algorithms/sft.py index 0d06ad6366..5ff77e11e9 100644 --- a/nemo_reinforcer/algorithms/sft.py +++ b/nemo_reinforcer/algorithms/sft.py @@ -182,6 +182,7 @@ def setup( if last_checkpoint_path else None, init_optimizer=True, + init_reference_model=False, ) loss_fn = NLLLoss() print(f" ✓ Model initialized") diff --git a/nemo_reinforcer/models/policy/hf_policy.py b/nemo_reinforcer/models/policy/hf_policy.py index 8d0f67f230..9c80fbbc8b 100644 --- a/nemo_reinforcer/models/policy/hf_policy.py +++ b/nemo_reinforcer/models/policy/hf_policy.py @@ -69,6 +69,7 @@ def __init__( weights_path: Optional[str] = None, optimizer_path: Optional[str] = None, init_optimizer: bool = True, + init_reference_model: bool = True, ): self.cfg = config # torch distributed init. Envars for rank, world_size, and master_addr and master_port are set from the ray remote call @@ -89,12 +90,14 @@ def __init__( device_map="cpu", # load weights onto CPU initially torch_dtype=torch.float32, # use full precision in sft until https://github.com/NVIDIA/reinforcer/issues/13 is fixed ) - self.reference_model = AutoModelForCausalLM.from_pretrained( - model_name, - device_map="cpu", # load weights onto CPU initially - torch_dtype=torch.float32, # use full precision in sft until https://github.com/NVIDIA/reinforcer/issues/13 is fixed - ) - + if init_reference_model: + self.reference_model = AutoModelForCausalLM.from_pretrained( + model_name, + device_map="cpu", # load weights onto CPU initially + torch_dtype=torch.float32, # use full precision in sft until https://github.com/NVIDIA/reinforcer/issues/13 is fixed + ) + else: + self.reference_model = None self.tokenizer = AutoTokenizer.from_pretrained(model_name) # If no pad token is defined, you might need: if self.tokenizer.pad_token is None: @@ -131,11 +134,10 @@ def do_fsdp(model): self.model.to("cuda") self.model = do_fsdp(self.model) self.model = self.manual_offload_to_cpu(self.model) - - self.reference_model.to("cuda") - self.reference_model = do_fsdp(self.reference_model) - self.reference_model = self.manual_offload_to_cpu(self.reference_model) - + if self.reference_model is not None: + self.reference_model.to("cuda") + self.reference_model = do_fsdp(self.reference_model) + self.reference_model = self.manual_offload_to_cpu(self.reference_model) self.model = self.manual_load_to_gpu(self.model) self._held_reference_model_params = None # register_fsdp_forward_method(self.model, "generate") @@ -925,6 +927,7 @@ def __init__( init_optimizer: bool = True, weights_path: Optional[str] = None, optimizer_path: Optional[str] = None, + init_reference_model: bool = True, ): if weights_path: weights_path = os.path.abspath(weights_path) @@ -937,6 +940,7 @@ def __init__( init_optimizer=init_optimizer, weights_path=weights_path, optimizer_path=optimizer_path, + init_reference_model=init_reference_model, ) additional_env_vars = {} if config["expandable_segments_enabled"]: diff --git a/tests/unit/models/policy/test_hf_ray_policy.py b/tests/unit/models/policy/test_hf_ray_policy.py index 29d8cbcbee..ded244feac 100644 --- a/tests/unit/models/policy/test_hf_ray_policy.py +++ b/tests/unit/models/policy/test_hf_ray_policy.py @@ -201,7 +201,7 @@ def training_setup(): config = basic_llama_test_config print("Creating training HfPolicy...") - policy = HfPolicy(cluster=cluster, config=config) + policy = HfPolicy(cluster=cluster, config=config, init_reference_model=False) # Create a test batch print("Creating test batch...") @@ -278,7 +278,7 @@ def verify_loss_tensor(loss_tensor): @pytest.fixture -def generation_setup(): +def generation_setup(request): """Setup and teardown specifically for generation tests.""" policy = None cluster = None @@ -300,7 +300,9 @@ def generation_setup(): config = basic_llama_test_config print("Creating generation HfPolicy...") - policy = HfPolicy(cluster=cluster, config=config) + policy = HfPolicy( + cluster=cluster, config=config, init_reference_model=request.param + ) # Create a test batch print("Creating test batch...") @@ -364,6 +366,7 @@ def generation_setup(): @pytest.mark.timeout(180) +@pytest.mark.parametrize("generation_setup", [False], indirect=True) def test_hf_policy_generation(generation_setup, tracker): policy, cluster, data, tokenizer, prompts, expected_generations = generation_setup @@ -450,6 +453,7 @@ def test_hf_policy_generation(generation_setup, tracker): @pytest.mark.timeout(180) +@pytest.mark.parametrize("generation_setup", [True], indirect=True) def test_all_hf_policy_generation_lps_ref_training(generation_setup): policy, cluster, data, tokenizer, prompts, expected_generations = generation_setup From 0c0aa6dc36448e58ef5cfbd74851704018eb0a4f Mon Sep 17 00:00:00 2001 From: Parth Chadha Date: Tue, 1 Apr 2025 15:39:43 -0700 Subject: [PATCH 26/27] =?UTF-8?q?fix:=20change=20grpo=20default=20to=20use?= =?UTF-8?q?=2064=20prompts=20per=20step=20and=2032=20generation=E2=80=A6?= =?UTF-8?q?=20(#111)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Parth Chadha Signed-off-by: Yi-Fu Wu --- examples/configs/grpo_math_1B.yaml | 2 +- examples/configs/grpo_math_8B.yaml | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/examples/configs/grpo_math_1B.yaml b/examples/configs/grpo_math_1B.yaml index a7cb644114..c79c9bde5b 100644 --- a/examples/configs/grpo_math_1B.yaml +++ b/examples/configs/grpo_math_1B.yaml @@ -27,7 +27,7 @@ policy: model_name: "meta-llama/Llama-3.2-1B-Instruct" train_global_batch_size: 512 train_micro_batch_size: 4 - generation_batch_size: 32 + generation_batch_size: 32 # Only used when generating using HF backend logprob_batch_size: 4 max_total_sequence_length: 512 precision: "bfloat16" diff --git a/examples/configs/grpo_math_8B.yaml b/examples/configs/grpo_math_8B.yaml index 3408e84e32..06005425cb 100644 --- a/examples/configs/grpo_math_8B.yaml +++ b/examples/configs/grpo_math_8B.yaml @@ -1,11 +1,15 @@ # GRPO Algorithm Configuration defaults: "grpo_math_1B.yaml" +grpo: + num_prompts_per_step: 64 + num_generations_per_prompt: 32 + policy: model_name: "meta-llama/Llama-3.1-8B-Instruct" train_global_batch_size: 512 train_micro_batch_size: 1 - generation_batch_size: 32 + generation_batch_size: 32 # Only used when generating using HF backend logprob_batch_size: 2 max_total_sequence_length: 4096 precision: "bfloat16" From 97c5e1b73594c18202f82b7ab3eae69a8bb35abf Mon Sep 17 00:00:00 2001 From: Parth Chadha Date: Tue, 1 Apr 2025 15:58:53 -0700 Subject: [PATCH 27/27] feat: use cuda_graph by default for vllm (#116) Signed-off-by: Parth Chadha Signed-off-by: Yi-Fu Wu --- nemo_reinforcer/models/generation/vllm.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nemo_reinforcer/models/generation/vllm.py b/nemo_reinforcer/models/generation/vllm.py index edef3c69ae..e22110bf55 100644 --- a/nemo_reinforcer/models/generation/vllm.py +++ b/nemo_reinforcer/models/generation/vllm.py @@ -178,7 +178,8 @@ def __init__( gpu_memory_utilization=self.cfg["vllm_cfg"]["gpu_memory_utilization"], enable_prefix_caching=True, dtype="auto", - enforce_eager=True, + # Use cuda-graph by default for performance, set to True to use eager execution + enforce_eager=False, max_model_len=self.cfg["vllm_cfg"]["max_model_len"], trust_remote_code=True, worker_cls=UpdatableVllmInternalWorker,