diff --git a/nemo_reinforcer/models/generation/vllm.py b/nemo_reinforcer/models/generation/vllm.py
index 4e8ff364c7..f7138689e3 100644
--- a/nemo_reinforcer/models/generation/vllm.py
+++ b/nemo_reinforcer/models/generation/vllm.py
@@ -18,7 +18,6 @@
 
 import ray
 import torch
-from transformers import AutoTokenizer
 
 from nemo_reinforcer.models.generation.interfaces import (
     GenerationInterface,
@@ -109,8 +108,7 @@ def configure_worker(
             env_vars["RAY_EXPERIMENTAL_NOSET_CUDA_VISIBLE_DEVICES"] = "1"
             init_kwargs["fraction_of_gpus"] = num_gpus
 
-        # Force vllm to use v0 runtime (will be enabled by default in #51)
-        env_vars["VLLM_USE_V1"] = "0"
+        env_vars["VLLM_ENABLE_V1_MULTIPROCESSING"] = "0"
 
         return resources, env_vars, init_kwargs
 
@@ -150,12 +148,9 @@ def __init__(
         self.world_size = 1
 
         try:
-            from vllm import LLM, SamplingParams
-            from nemo_reinforcer.models.generation.vllm_backend import (
-                UpdatableVllmInternalWorker,
-            )
+            import vllm
 
-            self.SamplingParams = SamplingParams
+            self.SamplingParams = vllm.SamplingParams
         except ImportError:
             raise ImportError(
                 "vLLM is not installed. Please install it with `pip install nemo-reinforcer[vllm]` "
@@ -184,7 +179,7 @@ def __init__(
             # For non-TP mode, explicitly set executor to None to avoid Ray issues
             vllm_kwargs["distributed_executor_backend"] = None
 
-        self.llm = LLM(
+        self.llm = vllm.LLM(
             model=self.model_name,
             # Training pipeline will set this to "dummy" and eval will load real weights using 'auto'
             load_format=self.cfg["vllm_cfg"]["load_format"],
@@ -198,7 +193,7 @@ def __init__(
             enforce_eager=True,
             max_model_len=self.cfg["vllm_cfg"]["max_model_len"],
             trust_remote_code=True,
-            worker_cls=UpdatableVllmInternalWorker,
+            worker_extension_cls="nemo_reinforcer.models.generation.vllm_backend.VllmInternalWorkerExtension",
             enable_sleep_mode=True,
             disable_log_stats=True,
             **vllm_kwargs,
diff --git a/nemo_reinforcer/models/generation/vllm_backend.py b/nemo_reinforcer/models/generation/vllm_backend.py
index 09e94f2815..a7fd12aa26 100644
--- a/nemo_reinforcer/models/generation/vllm_backend.py
+++ b/nemo_reinforcer/models/generation/vllm_backend.py
@@ -14,7 +14,7 @@
 import torch
 
 try:
-    from vllm.worker.worker import Worker
+    import vllm
 except ImportError:
     raise ImportError(
         "vLLM is not installed. Please install it with `pip install nemo-reinforcer[vllm]` "
@@ -23,7 +23,7 @@
     )
 
 
-class UpdatableVllmInternalWorker(Worker):
+class VllmInternalWorkerExtension:
     def report_device_id(self) -> str:
         from nemo_reinforcer.utils.nvml import get_device_uuid
 
@@ -60,6 +60,6 @@ def update_weights_from_ipc_handles(self, ipc_handles):
             return True
         except Exception as e:
             print(
-                f"Error in UpdatableVllmInternalWorker.update_weights_from_ipc_handles: {e}"
+                f"Error in VllmInternalWorkerExtension.update_weights_from_ipc_handles: {e}"
             )
             return False
diff --git a/pyproject.toml b/pyproject.toml
index febbf9c5f1..b7f8260ff8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -38,7 +38,7 @@ readme = {file = "README.md", content-type = "text/markdown"}
 
 [project.optional-dependencies]
 vllm = [
-    "vllm==0.8.0",
+    "vllm==0.8.2",
 ]
 
 [dependency-groups]
@@ -109,4 +109,4 @@ convention = "google"
 #  --link-mode=copy (slower but more reliable; supresses warning)
 #  --link-mode=symlink (fastest option when uv cache and venv on different file-system; caveat: venv is brittle since it depends on the environment/container)
 #
-#link-mode = "symlink"
\ No newline at end of file
+#link-mode = "symlink"
diff --git a/uv.lock b/uv.lock
index b5d6bbb4f0..d546f25e64 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1361,6 +1361,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/6d/eb/a5e8b06b924b4149cf498e1598116bad1e91ab23046c2dfc2c498154d393/latex2sympy2_extended-1.10.1-py3-none-any.whl", hash = "sha256:917a23e8f3b6edea88a56978fbbe87ed9fca4197f8277646be57b4660710347c", size = 207460 },
 ]
 
+[[package]]
+name = "llguidance"
+version = "0.7.11"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/7c/4b/92f81aa9d98e2c0721e2760e0fa1ae1691380bd27f2bf530310671a777d9/llguidance-0.7.11.tar.gz", hash = "sha256:226409610f1d1e0ecd62f15d1dd47851879513eb1eb56129c56de8188b80fa8d", size = 384121 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c2/38/bb5e0e185f84e4702ca079b0874de88b0d1b7245c48fc6449b766bce6103/llguidance-0.7.11-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:c1639466113196cf6d274461deaafbe6011b60d459f773ca97045df1ee87e195", size = 3065620 },
+    { url = "https://files.pythonhosted.org/packages/c3/c3/14f1173407a0ba18e1f57d26eae4da49d6336d5e0405336b9cbcb749848b/llguidance-0.7.11-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:1e6899df33f3372ec86d7c1939e33891fda9e9a533dcd7f7f8c556897446765b", size = 2957459 },
+    { url = "https://files.pythonhosted.org/packages/5f/07/6064f1253708c879c96ce0b74bacd7ab2845c0e8199ff13d84681a5041ad/llguidance-0.7.11-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32edcdc60922bdc97dcbae4d18e2d6dca451571959303ced7b7821dbbd344c0f", size = 13561497 },
+    { url = "https://files.pythonhosted.org/packages/e1/9e/96d96fab0c27adb9f51dabc42682d12dfe4602e7637a71614b916879ae7a/llguidance-0.7.11-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b167f7d4da85747378c0c58393cd078b459a90d6e8a60e676692784a78a6f61", size = 13687114 },
+    { url = "https://files.pythonhosted.org/packages/c3/72/f5ed95fd29faf6b197d6af543671306ef154741f804b197c3e3f7ad15a8b/llguidance-0.7.11-cp39-abi3-win_amd64.whl", hash = "sha256:585cb3b52a702303240ae91cc0633735dab3a1db2c062af8ffb4ef3ca4737236", size = 2611515 },
+]
+
 [[package]]
 name = "llvmlite"
 version = "0.43.0"
@@ -1823,7 +1836,7 @@ requires-dist = [
     { name = "torch", specifier = "==2.6.0" },
     { name = "torchdata" },
     { name = "transformers" },
-    { name = "vllm", marker = "extra == 'vllm'", specifier = "==0.8.0" },
+    { name = "vllm", marker = "extra == 'vllm'", specifier = "==0.8.2" },
     { name = "wandb" },
 ]
 provides-extras = ["vllm"]
@@ -4153,7 +4166,7 @@ wheels = [
 
 [[package]]
 name = "vllm"
-version = "0.8.0"
+version = "0.8.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "aiohttp" },
@@ -4168,6 +4181,7 @@ dependencies = [
     { name = "gguf" },
     { name = "importlib-metadata" },
     { name = "lark" },
+    { name = "llguidance", marker = "platform_machine == 'aarch64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" },
     { name = "lm-format-enforcer" },
     { name = "mistral-common", extra = ["opencv"] },
     { name = "msgspec" },
@@ -4205,9 +4219,9 @@ dependencies = [
     { name = "xformers", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
     { name = "xgrammar", marker = "platform_machine == 'aarch64' or platform_machine == 'x86_64'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/d2/27/73a54707964c5160067e253398cc328943e3ddbaa3099265ab593e6ec766/vllm-0.8.0.tar.gz", hash = "sha256:449e6651d30d6d5025d0d42499cf1a02d983915ef3b3670547db14a0431aa9bd", size = 6407594 }
+sdist = { url = "https://files.pythonhosted.org/packages/df/4d/6b27cc14d0c35e578a743a767953500a801ba296694b7e44cca709738b41/vllm-0.8.2.tar.gz", hash = "sha256:9b337b1c4072ccb94b1bf2b716593fadbe2dcb8d091f9bcbd6b5c6d37f9842ac", size = 6450146 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/15/77/7beca2061aadfdfd2d81411102e6445b459bcfedfc46671d4712de6a00fb/vllm-0.8.0-cp38-abi3-manylinux1_x86_64.whl", hash = "sha256:d3660eda448560b0ce6a1524466d7d36ec0024e772c9dbf562dbead980e7d480", size = 265290109 },
+    { url = "https://files.pythonhosted.org/packages/57/49/207364110b96d76139a4e80617e5831d46884abe824941b15c8a748ca5e0/vllm-0.8.2-cp38-abi3-manylinux1_x86_64.whl", hash = "sha256:32442b686c5dad8e6ddcf5a8b0cf3f741359fed6a9e9e940009f1daf80ae15de", size = 293643693 },
 ]
 
 [[package]]