Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 5 additions & 10 deletions nemo_reinforcer/models/generation/vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

import ray
import torch
from transformers import AutoTokenizer

from nemo_reinforcer.models.generation.interfaces import (
GenerationInterface,
Expand Down Expand Up @@ -109,8 +108,7 @@ def configure_worker(
env_vars["RAY_EXPERIMENTAL_NOSET_CUDA_VISIBLE_DEVICES"] = "1"
init_kwargs["fraction_of_gpus"] = num_gpus

# Force vllm to use v0 runtime (will be enabled by default in #51)
env_vars["VLLM_USE_V1"] = "0"
env_vars["VLLM_ENABLE_V1_MULTIPROCESSING"] = "0"

return resources, env_vars, init_kwargs

Expand Down Expand Up @@ -150,12 +148,9 @@ def __init__(
self.world_size = 1

try:
from vllm import LLM, SamplingParams
from nemo_reinforcer.models.generation.vllm_backend import (
UpdatableVllmInternalWorker,
)
import vllm
Comment thread
parthchadha marked this conversation as resolved.

self.SamplingParams = SamplingParams
self.SamplingParams = vllm.SamplingParams
except ImportError:
raise ImportError(
"vLLM is not installed. Please install it with `pip install nemo-reinforcer[vllm]` "
Expand Down Expand Up @@ -184,7 +179,7 @@ def __init__(
# For non-TP mode, explicitly set executor to None to avoid Ray issues
vllm_kwargs["distributed_executor_backend"] = None

self.llm = LLM(
self.llm = vllm.LLM(
model=self.model_name,
# Training pipeline will set this to "dummy" and eval will load real weights using 'auto'
load_format=self.cfg["vllm_cfg"]["load_format"],
Expand All @@ -198,7 +193,7 @@ def __init__(
enforce_eager=True,
max_model_len=self.cfg["vllm_cfg"]["max_model_len"],
trust_remote_code=True,
worker_cls=UpdatableVllmInternalWorker,
worker_extension_cls="nemo_reinforcer.models.generation.vllm_backend.VllmInternalWorkerExtension",
enable_sleep_mode=True,
disable_log_stats=True,
**vllm_kwargs,
Expand Down
6 changes: 3 additions & 3 deletions nemo_reinforcer/models/generation/vllm_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import torch

try:
from vllm.worker.worker import Worker
import vllm
except ImportError:
raise ImportError(
"vLLM is not installed. Please install it with `pip install nemo-reinforcer[vllm]` "
Expand All @@ -23,7 +23,7 @@
)


class UpdatableVllmInternalWorker(Worker):
class VllmInternalWorkerExtension:
def report_device_id(self) -> str:
from nemo_reinforcer.utils.nvml import get_device_uuid

Expand Down Expand Up @@ -60,6 +60,6 @@ def update_weights_from_ipc_handles(self, ipc_handles):
return True
except Exception as e:
print(
f"Error in UpdatableVllmInternalWorker.update_weights_from_ipc_handles: {e}"
f"Error in VllmInternalWorkerExtension.update_weights_from_ipc_handles: {e}"
)
return False
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ readme = {file = "README.md", content-type = "text/markdown"}

[project.optional-dependencies]
vllm = [
"vllm==0.8.0",
"vllm==0.8.2",
]

[dependency-groups]
Expand Down Expand Up @@ -109,4 +109,4 @@ convention = "google"
# --link-mode=copy (slower but more reliable; supresses warning)
# --link-mode=symlink (fastest option when uv cache and venv on different file-system; caveat: venv is brittle since it depends on the environment/container)
#
#link-mode = "symlink"
#link-mode = "symlink"
22 changes: 18 additions & 4 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.