From 7ce8d142f4f16a4eabf97222c598263bb0b5696c Mon Sep 17 00:00:00 2001 From: Yuki Huang Date: Mon, 14 Apr 2025 08:24:43 +0000 Subject: [PATCH 01/13] stream refit Signed-off-by: Yuki Huang dtensor state_dict Signed-off-by: Yuki Huang clean test code Signed-off-by: Yuki Huang improve time consuming: move loop to outside instead of yield Signed-off-by: Yuki Huang free the memory of last full_tensor in the refit process Signed-off-by: Yuki Huang fix unit test Signed-off-by: Yuki Huang rename held param Signed-off-by: Yuki Huang fix rebase Signed-off-by: Yuki Huang --- nemo_reinforcer/algorithms/grpo.py | 7 +- .../models/generation/vllm_backend.py | 18 ++--- .../models/policy/dtensor_policy_worker.py | 77 ++++++++---------- .../models/policy/fsdp1_policy_worker.py | 79 ++++++++++++------- nemo_reinforcer/models/policy/hf_policy.py | 16 +++- nemo_reinforcer/models/policy/utils.py | 4 + .../models/generation/test_vllm_generation.py | 21 +++-- 7 files changed, 125 insertions(+), 97 deletions(-) diff --git a/nemo_reinforcer/algorithms/grpo.py b/nemo_reinforcer/algorithms/grpo.py index 84e02b39a9..6d032a4a96 100644 --- a/nemo_reinforcer/algorithms/grpo.py +++ b/nemo_reinforcer/algorithms/grpo.py @@ -277,9 +277,12 @@ def refit_policy_generation( ): """Refit the policy generation interface with the latest policy weights.""" policy.offload_before_refit() - ipc_handles = policy.get_weights_ipc_handles() policy_generation.prepare_for_generation() - policy_generation.update_weights(ipc_handles) + # Streaming update weights to save memory + param_keys = policy.prepare_weights_for_ipc() + for key in param_keys: + ipc_handles = policy.get_weights_ipc_handles(key) + policy_generation.update_weights(ipc_handles) policy.offload_after_refit() diff --git a/nemo_reinforcer/models/generation/vllm_backend.py b/nemo_reinforcer/models/generation/vllm_backend.py index 1e5fa21a33..662fa7d21c 100644 --- a/nemo_reinforcer/models/generation/vllm_backend.py +++ b/nemo_reinforcer/models/generation/vllm_backend.py @@ -40,21 +40,19 @@ def update_weights_from_ipc_handles(self, ipc_handles): try: # Get handles for this device device_uuid = self.report_device_id() - handles = ipc_handles[device_uuid] + named_handle = ipc_handles[device_uuid] device_id = self.device.index - weights = [] # Process each handle to get the tensor - for name, handle in handles.items(): - func, args = handle - list_args = list(args) - # Update device ID to match the current device - list_args[6] = device_id - tensor = func(*list_args) - weights.append((name, tensor)) + name, handle = named_handle + func, args = handle + list_args = list(args) + # Update device ID to match the current device + list_args[6] = device_id + tensor = func(*list_args) # Load weights into the model - self.model_runner.model.load_weights(weights=weights) + self.model_runner.model.load_weights(weights=[(name, tensor)]) torch.cuda.synchronize() return True except Exception as e: diff --git a/nemo_reinforcer/models/policy/dtensor_policy_worker.py b/nemo_reinforcer/models/policy/dtensor_policy_worker.py index a7c7f717fb..a0bc159ad0 100644 --- a/nemo_reinforcer/models/policy/dtensor_policy_worker.py +++ b/nemo_reinforcer/models/policy/dtensor_policy_worker.py @@ -174,7 +174,9 @@ def __init__( if self.cpu_offload: self.model = self.move_buffer_to_device(self.model, "cpu") - self._held_model_params = None + # used for streaming update inference engine weights + self._held_sharded_state_dict_reference = None + self._held_single_streamed_param_reference = None if init_reference_model: self.reference_model_state_dict = get_cpu_state_dict( @@ -235,6 +237,9 @@ def __init__( def is_alive(self): return True + def reset_peak_memory_stats(self): + torch.cuda.reset_peak_memory_stats() + def get_gpu_info(self): """Return information about the GPU being used by this worker.""" return get_gpu_info(self.model) @@ -533,50 +538,34 @@ def report_device_id(self) -> str: return get_device_uuid(device_idx) @torch.no_grad() - def get_weight_ipc_handles(self, offload_model=True): - from torch.multiprocessing.reductions import reduce_tensor - + def prepare_weights_for_ipc(self): self.model = self.move_to_cuda(self.model) - params = self.model.state_dict() - - # Create a copy of parameters in the desired dtype (bfloat16 or float32) - dtype_params = {} - for name, param in params.items(): - if isinstance(param, DTensor): - param = param.full_tensor() - - # Convert parameters to the configured dtype - dtype_params[name] = param.to( - device="cuda", dtype=self.dtype, non_blocking=True - ) - - for name, buffer in self.model.named_buffers(): - if isinstance(buffer, DTensor): - buffer = buffer.full_tensor() - - dtype_params[name] = buffer.to( - device="cuda", dtype=self.dtype, non_blocking=True - ) - - torch.cuda.synchronize() - - # Replace the original params with the converted ones - params = dtype_params + self._held_sharded_state_dict_reference = self.model.state_dict() + return self._held_sharded_state_dict_reference.keys() - # hold on to the params so we can explicitly delete them after refit - self._held_model_params = params + @torch.no_grad() + def get_weights_ipc_handles(self, key): + from torch.multiprocessing.reductions import reduce_tensor - data = {} + # Get device UUID for IPC device_uuid = self.report_device_id() - for name, p in params.items(): - data[name] = reduce_tensor(p.detach()) - if offload_model or self.cpu_offload: - self.model = self.move_to_cpu(self.model) - gc.collect() - torch.cuda.empty_cache() + # Get full_tensor for dtensor (GPU > 1) + tensor = self._held_sharded_state_dict_reference[key] + if isinstance(tensor, DTensor): + full_tensor = tensor.full_tensor() + else: + full_tensor = tensor + + # Convert parameters to the configured dtype + full_tensor = full_tensor.to(self.dtype, non_blocking=True) + # Temporary record the full tensor for cleanup + # It is needed for cleanup the last full_tensor in the refit process + self._held_single_streamed_param_reference = full_tensor - return {device_uuid: data} + # Create a handle for the tensor + handle = reduce_tensor(full_tensor.detach()) + return {device_uuid: (key, handle)} def prepare_for_lp_inference(self): if not self.cpu_offload: @@ -634,9 +623,13 @@ def offload_after_refit(self): torch.randn(1).cuda() # wake up torch allocator self.offload_before_refit() # rerun the old offload function - if self._held_model_params is not None: - del self._held_model_params - self._held_model_params = None + # Clean up the held tensors + if self._held_sharded_state_dict_reference is not None: + del self._held_sharded_state_dict_reference + self._held_sharded_state_dict_reference = None + if self._held_single_streamed_param_reference is not None: + del self._held_single_streamed_param_reference + self._held_single_streamed_param_reference = None gc.collect() torch.cuda.empty_cache() diff --git a/nemo_reinforcer/models/policy/fsdp1_policy_worker.py b/nemo_reinforcer/models/policy/fsdp1_policy_worker.py index c06d738929..1aa49c0787 100644 --- a/nemo_reinforcer/models/policy/fsdp1_policy_worker.py +++ b/nemo_reinforcer/models/policy/fsdp1_policy_worker.py @@ -149,7 +149,11 @@ def do_fsdp(model): self.reference_model = do_fsdp(self.reference_model) self.reference_model = self.manual_offload_to_cpu(self.reference_model) self.model = self.manual_load_to_gpu(self.model) - self._held_reference_model_params = None + + # used for streaming update inference engine weights + self._held_sharded_state_dict_reference = None + self._held_single_streamed_param_reference = None + # register_fsdp_forward_method(self.model, "generate") if init_optimizer: optimizer_cls = import_class_from_path(self.cfg["optimizer"]["name"]) @@ -205,6 +209,9 @@ def do_fsdp(model): def is_alive(self): return True + def reset_peak_memory_stats(self): + torch.cuda.reset_peak_memory_stats() + def get_gpu_info(self): """Return information about the GPU being used by this worker.""" return get_gpu_info(self.model) @@ -689,38 +696,48 @@ def report_device_id(self) -> str: return get_device_uuid(device_idx) @torch.no_grad() - def get_weight_ipc_handles(self, offload_model=True): - from torch.multiprocessing.reductions import reduce_tensor + def prepare_weights_for_ipc(self): + from torch.distributed.fsdp.api import ShardedStateDictConfig, StateDictType # If the model is not FSDP, then we need to manually move it to the GPU # For an FSDP model, model.state_dict() will move the params to the GPU - if not isinstance(self.model, torch.distributed.fsdp.FullyShardedDataParallel): + if not isinstance(self.model, FullyShardedDataParallel): self.model = self.manual_load_to_gpu(self.model) + self._held_sharded_state_dict_reference = self.model.state_dict() + else: + # Get sharded state dict instead of full state dict for FSDP1 + with FullyShardedDataParallel.state_dict_type( + self.model, + state_dict_type=StateDictType.SHARDED_STATE_DICT, + state_dict_config=ShardedStateDictConfig(), + ): + self._held_sharded_state_dict_reference = self.model.state_dict() + return self._held_sharded_state_dict_reference.keys() + + @torch.no_grad() + def get_weights_ipc_handles(self, key): + from torch.distributed.tensor import DTensor + from torch.multiprocessing.reductions import reduce_tensor - # TODO @sahilj: do this without an allgather (maybe FSDP2) - params = self.model.state_dict() - - # Create a copy of parameters in the desired dtype (bfloat16 or float32) - dtype_params = {} - for name, param in params.items(): - # Convert parameters to the configured dtype - dtype_params[name] = param.to(self.dtype, non_blocking=True) - - # Replace the original params with the converted ones - params = dtype_params - # For FSDP1, params may get GC'ed before sending to vllm, - # so we need to hold a reference to them - self._held_reference_model_params = params - data = {} + # Get device UUID for IPC device_uuid = self.report_device_id() - for name, p in params.items(): - data[name] = reduce_tensor(p.detach()) - if offload_model: - self.model = self.manual_offload_to_cpu(self.model) - gc.collect() - torch.cuda.empty_cache() - return {device_uuid: data} + # Get full_tensor for dtensor (GPU > 1) + tensor = self._held_sharded_state_dict_reference[key] + if isinstance(tensor, DTensor): + full_tensor = tensor.full_tensor() + else: + full_tensor = tensor + + # Convert parameters to the configured dtype + full_tensor = full_tensor.to(self.dtype, non_blocking=True) + # Temporary record the full tensor for cleanup + # It is needed for cleanup the last full_tensor in the refit process + self._held_single_streamed_param_reference = full_tensor + + # Create a handle for the tensor + handle = reduce_tensor(full_tensor.detach()) + return {device_uuid: (key, handle)} def prepare_for_lp_inference(self): self.model = self.manual_load_to_gpu(self.model) @@ -771,9 +788,13 @@ def offload_after_refit(self): torch.randn(1).cuda() # wake up torch allocator self.offload_before_refit() # rerun the old offload function - if self._held_reference_model_params is not None: - del self._held_reference_model_params - self._held_reference_model_params = None + # Clean up the held tensors + if self._held_sharded_state_dict_reference is not None: + del self._held_sharded_state_dict_reference + self._held_sharded_state_dict_reference = None + if self._held_single_streamed_param_reference is not None: + del self._held_single_streamed_param_reference + self._held_single_streamed_param_reference = None gc.collect() torch.cuda.empty_cache() diff --git a/nemo_reinforcer/models/policy/hf_policy.py b/nemo_reinforcer/models/policy/hf_policy.py index e4fea94363..a0c6ef2945 100644 --- a/nemo_reinforcer/models/policy/hf_policy.py +++ b/nemo_reinforcer/models/policy/hf_policy.py @@ -250,7 +250,19 @@ def finish_training(self, *args, **kwargs): # Placeholder implementation pass - def get_weights_ipc_handles(self): + def prepare_weights_for_ipc(self): + """Prepare the weights for IPC. + + Returns: + dict: A dictionary containing the keys of the parameters. + """ + futures = self.worker_group.run_all_workers_single_data( + "prepare_weights_for_ipc", only_on="all_tied_workers" + ) + # only get the first worker's result is enough since all workers will have the same result + return ray.get(futures)[0] + + def get_weights_ipc_handles(self, key): """Fetch weight IPC handles from all workers. Returns: @@ -259,7 +271,7 @@ def get_weights_ipc_handles(self): # Collect IPC handles from all workers worker_handles = ray.get( [ - worker.get_weight_ipc_handles.remote() + worker.get_weights_ipc_handles.remote(key) for worker in self.worker_group.workers ] ) diff --git a/nemo_reinforcer/models/policy/utils.py b/nemo_reinforcer/models/policy/utils.py index 0c249a7860..af2e84245e 100644 --- a/nemo_reinforcer/models/policy/utils.py +++ b/nemo_reinforcer/models/policy/utils.py @@ -45,6 +45,8 @@ def get_gpu_info(model): device_count = torch.cuda.device_count() memory_allocated = torch.cuda.memory_allocated(device) / (1024**2) # in MB memory_reserved = torch.cuda.memory_reserved(device) / (1024**2) # in MB + peak_memory = torch.cuda.max_memory_allocated() / (1024**2) # in MB + peak_reserved = torch.cuda.max_memory_reserved() / (1024**2) # in MB # Try to get the real global device ID (not the local one) # In distributed training, each process only sees its assigned GPU as device 0 @@ -83,6 +85,8 @@ def get_gpu_info(model): "device_name": device_name, "memory_allocated_mb": memory_allocated, "memory_reserved_mb": memory_reserved, + "peak_memory_allocated_mb": peak_memory, + "peak_memory_reserved_mb": peak_reserved, "parameter_sample": param_info, "env_vars": { k: v diff --git a/tests/unit/models/generation/test_vllm_generation.py b/tests/unit/models/generation/test_vllm_generation.py index 04e0cd5969..a11981f9ae 100644 --- a/tests/unit/models/generation/test_vllm_generation.py +++ b/tests/unit/models/generation/test_vllm_generation.py @@ -18,6 +18,7 @@ import torch import ray +from nemo_reinforcer.algorithms.grpo import refit_policy_generation from nemo_reinforcer.algorithms.utils import get_tokenizer from nemo_reinforcer.distributed.virtual_cluster import RayVirtualCluster from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict @@ -270,9 +271,7 @@ def test_vllm_worker_seed_behavior(cluster, tokenizer): hf_policy = HfPolicy(cluster, hf_config, tokenizer) print(f"refitting vllm policy...") - ipc_handles = hf_policy.get_weights_ipc_handles() - policy.prepare_for_generation() - policy.update_weights(ipc_handles) + refit_policy_generation(hf_policy, policy) try: # Generate with duplicated prompts @@ -435,9 +434,7 @@ def test_vllm_generation_with_hf_training(cluster, tokenizer, enable_dtensor): hf_policy = HfPolicy(cluster, hf_config, tokenizer) print(f"refitting vllm policy...") - ipc_handles = hf_policy.get_weights_ipc_handles() - vllm_policy.prepare_for_generation() - vllm_policy.update_weights(ipc_handles) + refit_policy_generation(hf_policy, vllm_policy) # Step 1: Use vLLM for generation print("Using vLLM policy for fast generation...") @@ -709,9 +706,11 @@ def test_vllm_weight_update_and_prefix_cache_reset( ) print("Updating vLLM weights from HF policy...") - ipc_handles = hf_policy.get_weights_ipc_handles() - update_success = vllm_policy.update_weights(ipc_handles) - assert update_success, "Weight update should succeed" + param_keys = hf_policy.prepare_weights_for_ipc() + for key in param_keys: + ipc_handles = hf_policy.get_weights_ipc_handles(key) + update_success = vllm_policy.update_weights(ipc_handles) + assert update_success, "Weight update should succeed" print("vLLM weights successfully updated.") print("Running Generation 2 (Weights Updated, Cache Still Active)...") @@ -785,9 +784,7 @@ def test_vllm_generation_with_stop( hf_policy = HfPolicy(cluster, hf_config, tokenizer) print(f"refitting vllm policy...") - ipc_handles = hf_policy.get_weights_ipc_handles() - vllm_generation.prepare_for_generation() - vllm_generation.update_weights(ipc_handles) + refit_policy_generation(hf_policy, vllm_generation) # test generate outputs = vllm_generation.generate(test_input_data, greedy=True) From d9134a89e7a276ac8c817bf799ec813408f9d74d Mon Sep 17 00:00:00 2001 From: Yuki Huang Date: Thu, 17 Apr 2025 07:05:28 +0000 Subject: [PATCH 02/13] add unit test Signed-off-by: Yuki Huang --- .../models/generation/test_vllm_generation.py | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/tests/unit/models/generation/test_vllm_generation.py b/tests/unit/models/generation/test_vllm_generation.py index a11981f9ae..8d17ef11fa 100644 --- a/tests/unit/models/generation/test_vllm_generation.py +++ b/tests/unit/models/generation/test_vllm_generation.py @@ -748,6 +748,64 @@ def test_vllm_weight_update_and_prefix_cache_reset( torch.cuda.empty_cache() +def test_vllm_weight_update_memory(cluster, tokenizer): + """Test that vLLM streaming weight update and can save memory.""" + from nemo_reinforcer.models.policy.hf_policy import HfPolicy + + if cluster.num_gpus_per_node < 2: + pytest.skip("Need at least 2 GPUs per node for this test") + + # Create separate configs for each policy + vllm_config = basic_vllm_test_config.copy() + vllm_config = configure_generation_config(vllm_config, tokenizer, is_eval=False) + + # Ensure we can get same peak memory + assert vllm_config["model_name"] == "meta-llama/Llama-3.2-1B", ( + "Model name should be meta-llama/Llama-3.2-1B to get expected peak memory" + ) + + # Create policies + print("Creating vLLM policy...") + vllm_policy = VllmGeneration(cluster, vllm_config) + vllm_policy.finish_generation() + + print("Creating HF policy...") + hf_config = basic_hf_test_config.copy() + hf_policy = HfPolicy(cluster, hf_config, tokenizer) + + print(f"refitting vllm policy...") + # take it outside statistics to get clean peak memory during refit + hf_policy.offload_before_refit() + # reset peak memory stats before refit + workers = hf_policy.worker_group.workers + ray.get([w.reset_peak_memory_stats.remote() for w in workers]) + refit_policy_generation(hf_policy, vllm_policy) + gpu_infos = ray.get([w.get_gpu_info.remote() for w in workers]) + + # Gather memory stats + current_allocated = 0.0 + current_reserved = 0.0 + peak_allocated = 0.0 + peak_reserved = 0.0 + for status in gpu_infos: + current_allocated = max(current_allocated, status["memory_allocated_mb"]) + current_reserved = max(current_reserved, status["memory_reserved_mb"]) + peak_allocated = max(peak_allocated, status["peak_memory_allocated_mb"]) + peak_reserved = max(peak_reserved, status["peak_memory_reserved_mb"]) + + # Check memory stats + assert current_allocated == 0.0, "Memory should be 0 after refit completed" + assert current_reserved == 0.0, "Memory should be 0 after refit completed" + # memory threshold: memory during non-streaming weight update on 1B model on 2 GPUs + # memory during streaming weight update should less than this baseline threshold + assert peak_allocated < 11286, "Peak allocated memory should be less than 11286MB" + assert peak_reserved < 11298, "Peak reserved memory should be less than 11298MB" + + # Clean up + vllm_policy.shutdown() + hf_policy.shutdown() + + @pytest.mark.parametrize("is_eval", [True, False]) @pytest.mark.parametrize("enable_dtensor", [True, False]) def test_vllm_generation_with_stop( From a595eacc29fa1f41172a36be88f73480bf58bd0f Mon Sep 17 00:00:00 2001 From: Yuki Huang Date: Thu, 17 Apr 2025 20:23:31 -0700 Subject: [PATCH 03/13] upgrade vllm to 0.8.3 Signed-off-by: Yuki Huang update uv Signed-off-by: Yuki Huang --- pyproject.toml | 2 +- uv.lock | 178 ++++++++++++++++++++++++++++++------------------- 2 files changed, 111 insertions(+), 69 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b7f8260ff8..4b3e064a4e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,7 @@ readme = {file = "README.md", content-type = "text/markdown"} [project.optional-dependencies] vllm = [ - "vllm==0.8.2", + "vllm==0.8.3", ] [dependency-groups] diff --git a/uv.lock b/uv.lock index d546f25e64..c5289a2bed 100644 --- a/uv.lock +++ b/uv.lock @@ -1088,6 +1088,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/95/04/ff642e65ad6b90db43e668d70ffb6736436c7ce41fcc549f4e9472234127/h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761", size = 58259 }, ] +[[package]] +name = "hf-xet" +version = "1.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/95/68/4c363b2e62cb3dbe12d2257ba9b22f101384692d4b9727c5f72433472cff/hf_xet-1.0.3.tar.gz", hash = "sha256:a6d16861a06dd4b8f7229c16b392c5fb8b9588ced89a6ee9bc3e66227f794353", size = 257227 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/26/12/ebbba4b64cb9c908bd5dee355da27f3cc5ad4f29b4b2835041d363388363/hf_xet-1.0.3-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:0705e5db0da5794ab048a8662a7b3aba220f963270b26abc92e8d05abca22451", size = 4979740 }, + { url = "https://files.pythonhosted.org/packages/58/8f/34eadc408b834bcb55886b242a9783da3f63508c4bcbfda7a4f21e61f3d1/hf_xet-1.0.3-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:09a9565ca84049d48c99c83a82d08fbc21d63c04811fd2f7dd088292c1185bc5", size = 4806773 }, + { url = "https://files.pythonhosted.org/packages/a1/de/00b2e2568a39c01b0e013db3300f4d5841f2e597d7b0518923c7881bd166/hf_xet-1.0.3-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70e18534d46ab92bbc3125addaebc145f9b27e06eecd67b40c4342f4b92b677f", size = 53812632 }, + { url = "https://files.pythonhosted.org/packages/e2/d8/4ff790370a6795418196553c33e7bcceaa73a7d587e21e4ccb7661b54a2a/hf_xet-1.0.3-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:da28fd32213ad5b8f60771aba44ac032ba19d752928cfd95914f09146b3f51ec", size = 52277180 }, + { url = "https://files.pythonhosted.org/packages/83/dd/7b432918a3e9e09794674b81e852acc6e14177c0a4466ac0566b7e7f47a4/hf_xet-1.0.3-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:1b71118b8f7e9edf1ae56282388794f351163c7de5c22ea3737dffa9313f500e", size = 53309852 }, + { url = "https://files.pythonhosted.org/packages/4d/a2/d7a5f452a3a8faaa82aeb3aceddab2e103c1b7028a00bbc4caebca5d79fe/hf_xet-1.0.3-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:5927d1986f87b7b80616eb6353a1402be1d72c46b6b0709b01ffc7623a159563", size = 53739471 }, + { url = "https://files.pythonhosted.org/packages/82/81/966f800933043c0be989306f5224ef058543f7848f1e78d7ef3305bd069a/hf_xet-1.0.3-cp37-abi3-win_amd64.whl", hash = "sha256:014b5a40e62ad334f21513e5ba39b419117396031e9264dfc15dd598a1595029", size = 4123538 }, +] + [[package]] name = "httpcore" version = "1.0.7" @@ -1154,7 +1169,7 @@ wheels = [ [[package]] name = "huggingface-hub" -version = "0.29.3" +version = "0.30.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "filelock" }, @@ -1165,9 +1180,14 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/e5/f9/851f34b02970e8143d41d4001b2d49e54ef113f273902103823b8bc95ada/huggingface_hub-0.29.3.tar.gz", hash = "sha256:64519a25716e0ba382ba2d3fb3ca082e7c7eb4a2fc634d200e8380006e0760e5", size = 390123 } +sdist = { url = "https://files.pythonhosted.org/packages/df/22/8eb91736b1dcb83d879bd49050a09df29a57cc5cd9f38e48a4b1c45ee890/huggingface_hub-0.30.2.tar.gz", hash = "sha256:9a7897c5b6fd9dad3168a794a8998d6378210f5b9688d0dfc180b1a228dc2466", size = 400868 } wheels = [ - { url = "https://files.pythonhosted.org/packages/40/0c/37d380846a2e5c9a3c6a73d26ffbcfdcad5fc3eacf42fdf7cff56f2af634/huggingface_hub-0.29.3-py3-none-any.whl", hash = "sha256:0b25710932ac649c08cdbefa6c6ccb8e88eef82927cacdb048efb726429453aa", size = 468997 }, + { url = "https://files.pythonhosted.org/packages/93/27/1fb384a841e9661faad1c31cbfa62864f59632e876df5d795234da51c395/huggingface_hub-0.30.2-py3-none-any.whl", hash = "sha256:68ff05969927058cfa41df4f2155d4bb48f5f54f719dd0390103eefa9b191e28", size = 481433 }, +] + +[package.optional-dependencies] +hf-xet = [ + { name = "hf-xet" }, ] [[package]] @@ -1376,25 +1396,30 @@ wheels = [ [[package]] name = "llvmlite" -version = "0.43.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/9f/3d/f513755f285db51ab363a53e898b85562e950f79a2e6767a364530c2f645/llvmlite-0.43.0.tar.gz", hash = "sha256:ae2b5b5c3ef67354824fb75517c8db5fbe93bc02cd9671f3c62271626bc041d5", size = 157069 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/23/ff/6ca7e98998b573b4bd6566f15c35e5c8bea829663a6df0c7aa55ab559da9/llvmlite-0.43.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a289af9a1687c6cf463478f0fa8e8aa3b6fb813317b0d70bf1ed0759eab6f761", size = 31064408 }, - { url = "https://files.pythonhosted.org/packages/ca/5c/a27f9257f86f0cda3f764ff21d9f4217b9f6a0d45e7a39ecfa7905f524ce/llvmlite-0.43.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6d4fd101f571a31acb1559ae1af30f30b1dc4b3186669f92ad780e17c81e91bc", size = 28793153 }, - { url = "https://files.pythonhosted.org/packages/7e/3c/4410f670ad0a911227ea2ecfcba9f672a77cf1924df5280c4562032ec32d/llvmlite-0.43.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7d434ec7e2ce3cc8f452d1cd9a28591745de022f931d67be688a737320dfcead", size = 42857276 }, - { url = "https://files.pythonhosted.org/packages/c6/21/2ffbab5714e72f2483207b4a1de79b2eecd9debbf666ff4e7067bcc5c134/llvmlite-0.43.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6912a87782acdff6eb8bf01675ed01d60ca1f2551f8176a300a886f09e836a6a", size = 43871781 }, - { url = "https://files.pythonhosted.org/packages/f2/26/b5478037c453554a61625ef1125f7e12bb1429ae11c6376f47beba9b0179/llvmlite-0.43.0-cp310-cp310-win_amd64.whl", hash = "sha256:14f0e4bf2fd2d9a75a3534111e8ebeb08eda2f33e9bdd6dfa13282afacdde0ed", size = 28123487 }, - { url = "https://files.pythonhosted.org/packages/95/8c/de3276d773ab6ce3ad676df5fab5aac19696b2956319d65d7dd88fb10f19/llvmlite-0.43.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3e8d0618cb9bfe40ac38a9633f2493d4d4e9fcc2f438d39a4e854f39cc0f5f98", size = 31064409 }, - { url = "https://files.pythonhosted.org/packages/ee/e1/38deed89ced4cf378c61e232265cfe933ccde56ae83c901aa68b477d14b1/llvmlite-0.43.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e0a9a1a39d4bf3517f2af9d23d479b4175ead205c592ceeb8b89af48a327ea57", size = 28793149 }, - { url = "https://files.pythonhosted.org/packages/2f/b2/4429433eb2dc8379e2cb582502dca074c23837f8fd009907f78a24de4c25/llvmlite-0.43.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1da416ab53e4f7f3bc8d4eeba36d801cc1894b9fbfbf2022b29b6bad34a7df2", size = 42857277 }, - { url = "https://files.pythonhosted.org/packages/6b/99/5d00a7d671b1ba1751fc9f19d3b36f3300774c6eebe2bcdb5f6191763eb4/llvmlite-0.43.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:977525a1e5f4059316b183fb4fd34fa858c9eade31f165427a3977c95e3ee749", size = 43871781 }, - { url = "https://files.pythonhosted.org/packages/20/ab/ed5ed3688c6ba4f0b8d789da19fd8e30a9cf7fc5852effe311bc5aefe73e/llvmlite-0.43.0-cp311-cp311-win_amd64.whl", hash = "sha256:d5bd550001d26450bd90777736c69d68c487d17bf371438f975229b2b8241a91", size = 28107433 }, - { url = "https://files.pythonhosted.org/packages/0b/67/9443509e5d2b6d8587bae3ede5598fa8bd586b1c7701696663ea8af15b5b/llvmlite-0.43.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f99b600aa7f65235a5a05d0b9a9f31150c390f31261f2a0ba678e26823ec38f7", size = 31064409 }, - { url = "https://files.pythonhosted.org/packages/a2/9c/24139d3712d2d352e300c39c0e00d167472c08b3bd350c3c33d72c88ff8d/llvmlite-0.43.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:35d80d61d0cda2d767f72de99450766250560399edc309da16937b93d3b676e7", size = 28793145 }, - { url = "https://files.pythonhosted.org/packages/bf/f1/4c205a48488e574ee9f6505d50e84370a978c90f08dab41a42d8f2c576b6/llvmlite-0.43.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eccce86bba940bae0d8d48ed925f21dbb813519169246e2ab292b5092aba121f", size = 42857276 }, - { url = "https://files.pythonhosted.org/packages/00/5f/323c4d56e8401c50185fd0e875fcf06b71bf825a863699be1eb10aa2a9cb/llvmlite-0.43.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:df6509e1507ca0760787a199d19439cc887bfd82226f5af746d6977bd9f66844", size = 43871781 }, - { url = "https://files.pythonhosted.org/packages/c6/94/dea10e263655ce78d777e78d904903faae39d1fc440762be4a9dc46bed49/llvmlite-0.43.0-cp312-cp312-win_amd64.whl", hash = "sha256:7a2872ee80dcf6b5dbdc838763d26554c2a18aa833d31a2635bff16aafefb9c9", size = 28107442 }, +version = "0.44.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/89/6a/95a3d3610d5c75293d5dbbb2a76480d5d4eeba641557b69fe90af6c5b84e/llvmlite-0.44.0.tar.gz", hash = "sha256:07667d66a5d150abed9157ab6c0b9393c9356f229784a4385c02f99e94fc94d4", size = 171880 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/41/75/d4863ddfd8ab5f6e70f4504cf8cc37f4e986ec6910f4ef8502bb7d3c1c71/llvmlite-0.44.0-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:9fbadbfba8422123bab5535b293da1cf72f9f478a65645ecd73e781f962ca614", size = 28132306 }, + { url = "https://files.pythonhosted.org/packages/37/d9/6e8943e1515d2f1003e8278819ec03e4e653e2eeb71e4d00de6cfe59424e/llvmlite-0.44.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cccf8eb28f24840f2689fb1a45f9c0f7e582dd24e088dcf96e424834af11f791", size = 26201096 }, + { url = "https://files.pythonhosted.org/packages/aa/46/8ffbc114def88cc698906bf5acab54ca9fdf9214fe04aed0e71731fb3688/llvmlite-0.44.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7202b678cdf904823c764ee0fe2dfe38a76981f4c1e51715b4cb5abb6cf1d9e8", size = 42361859 }, + { url = "https://files.pythonhosted.org/packages/30/1c/9366b29ab050a726af13ebaae8d0dff00c3c58562261c79c635ad4f5eb71/llvmlite-0.44.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:40526fb5e313d7b96bda4cbb2c85cd5374e04d80732dd36a282d72a560bb6408", size = 41184199 }, + { url = "https://files.pythonhosted.org/packages/69/07/35e7c594b021ecb1938540f5bce543ddd8713cff97f71d81f021221edc1b/llvmlite-0.44.0-cp310-cp310-win_amd64.whl", hash = "sha256:41e3839150db4330e1b2716c0be3b5c4672525b4c9005e17c7597f835f351ce2", size = 30332381 }, + { url = "https://files.pythonhosted.org/packages/b5/e2/86b245397052386595ad726f9742e5223d7aea999b18c518a50e96c3aca4/llvmlite-0.44.0-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:eed7d5f29136bda63b6d7804c279e2b72e08c952b7c5df61f45db408e0ee52f3", size = 28132305 }, + { url = "https://files.pythonhosted.org/packages/ff/ec/506902dc6870249fbe2466d9cf66d531265d0f3a1157213c8f986250c033/llvmlite-0.44.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ace564d9fa44bb91eb6e6d8e7754977783c68e90a471ea7ce913bff30bd62427", size = 26201090 }, + { url = "https://files.pythonhosted.org/packages/99/fe/d030f1849ebb1f394bb3f7adad5e729b634fb100515594aca25c354ffc62/llvmlite-0.44.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c5d22c3bfc842668168a786af4205ec8e3ad29fb1bc03fd11fd48460d0df64c1", size = 42361858 }, + { url = "https://files.pythonhosted.org/packages/d7/7a/ce6174664b9077fc673d172e4c888cb0b128e707e306bc33fff8c2035f0d/llvmlite-0.44.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f01a394e9c9b7b1d4e63c327b096d10f6f0ed149ef53d38a09b3749dcf8c9610", size = 41184200 }, + { url = "https://files.pythonhosted.org/packages/5f/c6/258801143975a6d09a373f2641237992496e15567b907a4d401839d671b8/llvmlite-0.44.0-cp311-cp311-win_amd64.whl", hash = "sha256:d8489634d43c20cd0ad71330dde1d5bc7b9966937a263ff1ec1cebb90dc50955", size = 30331193 }, + { url = "https://files.pythonhosted.org/packages/15/86/e3c3195b92e6e492458f16d233e58a1a812aa2bfbef9bdd0fbafcec85c60/llvmlite-0.44.0-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:1d671a56acf725bf1b531d5ef76b86660a5ab8ef19bb6a46064a705c6ca80aad", size = 28132297 }, + { url = "https://files.pythonhosted.org/packages/d6/53/373b6b8be67b9221d12b24125fd0ec56b1078b660eeae266ec388a6ac9a0/llvmlite-0.44.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5f79a728e0435493611c9f405168682bb75ffd1fbe6fc360733b850c80a026db", size = 26201105 }, + { url = "https://files.pythonhosted.org/packages/cb/da/8341fd3056419441286c8e26bf436923021005ece0bff5f41906476ae514/llvmlite-0.44.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0143a5ef336da14deaa8ec26c5449ad5b6a2b564df82fcef4be040b9cacfea9", size = 42361901 }, + { url = "https://files.pythonhosted.org/packages/53/ad/d79349dc07b8a395a99153d7ce8b01d6fcdc9f8231355a5df55ded649b61/llvmlite-0.44.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d752f89e31b66db6f8da06df8b39f9b91e78c5feea1bf9e8c1fba1d1c24c065d", size = 41184247 }, + { url = "https://files.pythonhosted.org/packages/e2/3b/a9a17366af80127bd09decbe2a54d8974b6d8b274b39bf47fbaedeec6307/llvmlite-0.44.0-cp312-cp312-win_amd64.whl", hash = "sha256:eae7e2d4ca8f88f89d315b48c6b741dcb925d6a1042da694aa16ab3dd4cbd3a1", size = 30332380 }, + { url = "https://files.pythonhosted.org/packages/89/24/4c0ca705a717514c2092b18476e7a12c74d34d875e05e4d742618ebbf449/llvmlite-0.44.0-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:319bddd44e5f71ae2689859b7203080716448a3cd1128fb144fe5c055219d516", size = 28132306 }, + { url = "https://files.pythonhosted.org/packages/01/cf/1dd5a60ba6aee7122ab9243fd614abcf22f36b0437cbbe1ccf1e3391461c/llvmlite-0.44.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9c58867118bad04a0bb22a2e0068c693719658105e40009ffe95c7000fcde88e", size = 26201090 }, + { url = "https://files.pythonhosted.org/packages/d2/1b/656f5a357de7135a3777bd735cc7c9b8f23b4d37465505bd0eaf4be9befe/llvmlite-0.44.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46224058b13c96af1365290bdfebe9a6264ae62fb79b2b55693deed11657a8bf", size = 42361904 }, + { url = "https://files.pythonhosted.org/packages/d8/e1/12c5f20cb9168fb3464a34310411d5ad86e4163c8ff2d14a2b57e5cc6bac/llvmlite-0.44.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:aa0097052c32bf721a4efc03bd109d335dfa57d9bffb3d4c24cc680711b8b4fc", size = 41184245 }, + { url = "https://files.pythonhosted.org/packages/d0/81/e66fc86539293282fd9cb7c9417438e897f369e79ffb62e1ae5e5154d4dd/llvmlite-0.44.0-cp313-cp313-win_amd64.whl", hash = "sha256:2fb7c4f2fb86cbae6dca3db9ab203eeea0e22d73b99bc2341cdf9de93612e930", size = 30331193 }, ] [[package]] @@ -1768,6 +1793,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5f/df/76d0321c3797b54b60fef9ec3bd6f4cfd124b9e422182156a1dd418722cf/myst_parser-4.0.1-py3-none-any.whl", hash = "sha256:9134e88959ec3b5780aedf8a99680ea242869d012e8821db3126d427edc9c95d", size = 84579 }, ] +[[package]] +name = "nanobind" +version = "2.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/7d/f77f2bc2e2a210502a164556f8a742cd0f72f39061b97cb9d73bbd3ff0ab/nanobind-2.7.0.tar.gz", hash = "sha256:f9f1b160580c50dcf37b6495a0fd5ec61dc0d95dae5f8004f87dd9ad7eb46b34", size = 976093 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/96/14/989883082b395146120d34ca7e484a2b24cb73b0e428576a3a4249bd4082/nanobind-2.7.0-py3-none-any.whl", hash = "sha256:73b12d0e751d140d6c1bf4b215e18818a8debfdb374f08dc3776ad208d808e74", size = 241690 }, +] + [[package]] name = "nemo-reinforcer" source = { editable = "." } @@ -1836,7 +1870,7 @@ requires-dist = [ { name = "torch", specifier = "==2.6.0" }, { name = "torchdata" }, { name = "transformers" }, - { name = "vllm", marker = "extra == 'vllm'", specifier = "==0.8.2" }, + { name = "vllm", marker = "extra == 'vllm'", specifier = "==0.8.3" }, { name = "wandb" }, ] provides-extras = ["vllm"] @@ -1919,29 +1953,34 @@ wheels = [ [[package]] name = "numba" -version = "0.60.0" +version = "0.61.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "llvmlite" }, { name = "numpy" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/3c/93/2849300a9184775ba274aba6f82f303343669b0592b7bb0849ea713dabb0/numba-0.60.0.tar.gz", hash = "sha256:5df6158e5584eece5fc83294b949fd30b9f1125df7708862205217e068aabf16", size = 2702171 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f7/cf/baa13a7e3556d73d9e38021e6d6aa4aeb30d8b94545aa8b70d0f24a1ccc4/numba-0.60.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5d761de835cd38fb400d2c26bb103a2726f548dc30368853121d66201672e651", size = 2647627 }, - { url = "https://files.pythonhosted.org/packages/ac/ba/4b57fa498564457c3cc9fc9e570a6b08e6086c74220f24baaf04e54b995f/numba-0.60.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:159e618ef213fba758837f9837fb402bbe65326e60ba0633dbe6c7f274d42c1b", size = 2650322 }, - { url = "https://files.pythonhosted.org/packages/28/98/7ea97ee75870a54f938a8c70f7e0be4495ba5349c5f9db09d467c4a5d5b7/numba-0.60.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1527dc578b95c7c4ff248792ec33d097ba6bef9eda466c948b68dfc995c25781", size = 3407390 }, - { url = "https://files.pythonhosted.org/packages/79/58/cb4ac5b8f7ec64200460aef1fed88258fb872ceef504ab1f989d2ff0f684/numba-0.60.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fe0b28abb8d70f8160798f4de9d486143200f34458d34c4a214114e445d7124e", size = 3699694 }, - { url = "https://files.pythonhosted.org/packages/1c/b0/c61a93ca947d12233ff45de506ddbf52af3f752066a0b8be4d27426e16da/numba-0.60.0-cp310-cp310-win_amd64.whl", hash = "sha256:19407ced081d7e2e4b8d8c36aa57b7452e0283871c296e12d798852bc7d7f198", size = 2687030 }, - { url = "https://files.pythonhosted.org/packages/98/ad/df18d492a8f00d29a30db307904b9b296e37507034eedb523876f3a2e13e/numba-0.60.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a17b70fc9e380ee29c42717e8cc0bfaa5556c416d94f9aa96ba13acb41bdece8", size = 2647254 }, - { url = "https://files.pythonhosted.org/packages/9a/51/a4dc2c01ce7a850b8e56ff6d5381d047a5daea83d12bad08aa071d34b2ee/numba-0.60.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3fb02b344a2a80efa6f677aa5c40cd5dd452e1b35f8d1c2af0dfd9ada9978e4b", size = 2649970 }, - { url = "https://files.pythonhosted.org/packages/f9/4c/8889ac94c0b33dca80bed11564b8c6d9ea14d7f094e674c58e5c5b05859b/numba-0.60.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5f4fde652ea604ea3c86508a3fb31556a6157b2c76c8b51b1d45eb40c8598703", size = 3412492 }, - { url = "https://files.pythonhosted.org/packages/57/03/2b4245b05b71c0cee667e6a0b51606dfa7f4157c9093d71c6b208385a611/numba-0.60.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4142d7ac0210cc86432b818338a2bc368dc773a2f5cf1e32ff7c5b378bd63ee8", size = 3705018 }, - { url = "https://files.pythonhosted.org/packages/79/89/2d924ca60dbf949f18a6fec223a2445f5f428d9a5f97a6b29c2122319015/numba-0.60.0-cp311-cp311-win_amd64.whl", hash = "sha256:cac02c041e9b5bc8cf8f2034ff6f0dbafccd1ae9590dc146b3a02a45e53af4e2", size = 2686920 }, - { url = "https://files.pythonhosted.org/packages/eb/5c/b5ec752c475e78a6c3676b67c514220dbde2725896bbb0b6ec6ea54b2738/numba-0.60.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d7da4098db31182fc5ffe4bc42c6f24cd7d1cb8a14b59fd755bfee32e34b8404", size = 2647866 }, - { url = "https://files.pythonhosted.org/packages/65/42/39559664b2e7c15689a638c2a38b3b74c6e69a04e2b3019b9f7742479188/numba-0.60.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:38d6ea4c1f56417076ecf8fc327c831ae793282e0ff51080c5094cb726507b1c", size = 2650208 }, - { url = "https://files.pythonhosted.org/packages/67/88/c4459ccc05674ef02119abf2888ccd3e2fed12a323f52255f4982fc95876/numba-0.60.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:62908d29fb6a3229c242e981ca27e32a6e606cc253fc9e8faeb0e48760de241e", size = 3466946 }, - { url = "https://files.pythonhosted.org/packages/8b/41/ac11cf33524def12aa5bd698226ae196a1185831c05ed29dc0c56eaa308b/numba-0.60.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0ebaa91538e996f708f1ab30ef4d3ddc344b64b5227b67a57aa74f401bb68b9d", size = 3761463 }, - { url = "https://files.pythonhosted.org/packages/ca/bd/0fe29fcd1b6a8de479a4ed25c6e56470e467e3611c079d55869ceef2b6d1/numba-0.60.0-cp312-cp312-win_amd64.whl", hash = "sha256:f75262e8fe7fa96db1dca93d53a194a38c46da28b112b8a4aca168f0df860347", size = 2707588 }, +sdist = { url = "https://files.pythonhosted.org/packages/3c/88/c13a935f200fda51384411e49840a8e7f70c9cb1ee8d809dd0f2477cf7ef/numba-0.61.0.tar.gz", hash = "sha256:888d2e89b8160899e19591467e8fdd4970e07606e1fbc248f239c89818d5f925", size = 2816484 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/77/97/8568a025b9ab8b4d53491e70d4206d5f3fc71fbe94f3097058e01ad8e7ff/numba-0.61.0-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:9cab9783a700fa428b1a54d65295122bc03b3de1d01fb819a6b9dbbddfdb8c43", size = 2769008 }, + { url = "https://files.pythonhosted.org/packages/8c/ab/a88c20755f66543ee01c85c98b866595b92e1bd0ed80565a4889e22929a8/numba-0.61.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:46c5ae094fb3706f5adf9021bfb7fc11e44818d61afee695cdee4eadfed45e98", size = 2771815 }, + { url = "https://files.pythonhosted.org/packages/ae/f4/b357913089ecec1a9ddc6adc04090396928f36a484a5ab9e71b24ddba4cd/numba-0.61.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6fb74e81aa78a2303e30593d8331327dfc0d2522b5db05ac967556a26db3ef87", size = 3820233 }, + { url = "https://files.pythonhosted.org/packages/ea/60/0e21bcf3baaf10e39d48cd224618e46a6b75d3394f465c37ce57bf98cbfa/numba-0.61.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:0ebbd4827091384ab8c4615ba1b3ca8bc639a3a000157d9c37ba85d34cd0da1b", size = 3514707 }, + { url = "https://files.pythonhosted.org/packages/a0/08/45c136ab59e6b11e61ce15a0d17ef03fd89eaccb0db05ad67912aaf5218a/numba-0.61.0-cp310-cp310-win_amd64.whl", hash = "sha256:43aa4d7d10c542d3c78106b8481e0cbaaec788c39ee8e3d7901682748ffdf0b4", size = 2827753 }, + { url = "https://files.pythonhosted.org/packages/63/8f/f983a7c859ccad73d3cc3f86fbba94f16e137cd1ee464631d61b624363b2/numba-0.61.0-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:bf64c2d0f3d161af603de3825172fb83c2600bcb1d53ae8ea568d4c53ba6ac08", size = 2768960 }, + { url = "https://files.pythonhosted.org/packages/be/1b/c33dc847d475d5b647b4ad5aefc38df7a72283763f4cda47745050375a81/numba-0.61.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:de5aa7904741425f28e1028b85850b31f0a245e9eb4f7c38507fb893283a066c", size = 2771862 }, + { url = "https://files.pythonhosted.org/packages/14/91/18b9f64b34ff318a14d072251480547f89ebfb864b2b7168e5dc5f64f502/numba-0.61.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:21c2fe25019267a608e2710a6a947f557486b4b0478b02e45a81cf606a05a7d4", size = 3825411 }, + { url = "https://files.pythonhosted.org/packages/f2/97/1a38030c2a331e273ace1de2b61988e33d80878fda8a5eedee0cd78399d3/numba-0.61.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:74250b26ed6a1428763e774dc5b2d4e70d93f73795635b5412b8346a4d054574", size = 3519604 }, + { url = "https://files.pythonhosted.org/packages/df/a7/56f547de8fc197963f238fd62beb5f1d2cace047602d0577956bf6840970/numba-0.61.0-cp311-cp311-win_amd64.whl", hash = "sha256:b72bbc8708e98b3741ad0c63f9929c47b623cc4ee86e17030a4f3e301e8401ac", size = 2827642 }, + { url = "https://files.pythonhosted.org/packages/63/c9/c61881e7f2e253e745209f078bbd428ce23b6cf901f7d93afe166720ff95/numba-0.61.0-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:152146ecdbb8d8176f294e9f755411e6f270103a11c3ff50cecc413f794e52c8", size = 2769758 }, + { url = "https://files.pythonhosted.org/packages/e1/28/ddec0147a4933f86ceaca580aa9bb767d5632ecdb1ece6cfb3eab4ac78e5/numba-0.61.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5cafa6095716fcb081618c28a8d27bf7c001e09696f595b41836dec114be2905", size = 2772445 }, + { url = "https://files.pythonhosted.org/packages/18/74/6a9f0e6c76c088f8a6aa702eab31734068061dca5cc0f34e8bc1eb447de1/numba-0.61.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ffe9fe373ed30638d6e20a0269f817b2c75d447141f55a675bfcf2d1fe2e87fb", size = 3882115 }, + { url = "https://files.pythonhosted.org/packages/53/68/d7c31e53f08e6b4669c9b5a3cd7c5fb9097220c5ef388bc099ca8ab9749f/numba-0.61.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:9f25f7fef0206d55c1cfb796ad833cbbc044e2884751e56e798351280038484c", size = 3573296 }, + { url = "https://files.pythonhosted.org/packages/94/4f/8357a99a14f331b865a42cb4756ae37da85599b9c95e01277ea10361e91a/numba-0.61.0-cp312-cp312-win_amd64.whl", hash = "sha256:550d389573bc3b895e1ccb18289feea11d937011de4d278b09dc7ed585d1cdcb", size = 2828077 }, + { url = "https://files.pythonhosted.org/packages/3b/54/71fba18e4af5619f1ea8175ee92e82dd8e220bd6feb8c0153c6b814c8a60/numba-0.61.0-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:b96fafbdcf6f69b69855273e988696aae4974115a815f6818fef4af7afa1f6b8", size = 2768024 }, + { url = "https://files.pythonhosted.org/packages/39/76/2448b43d08e904aad1b1b9cd12835b19411e84a81aa9192f83642a5e0afd/numba-0.61.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5f6c452dca1de8e60e593f7066df052dd8da09b243566ecd26d2b796e5d3087d", size = 2769541 }, + { url = "https://files.pythonhosted.org/packages/32/8f/4bb2374247ab988c9eac587b304b2947a36d605b9bb9ba4bf06e955c17d3/numba-0.61.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:44240e694d4aa321430c97b21453e46014fe6c7b8b7d932afa7f6a88cc5d7e5e", size = 3890102 }, + { url = "https://files.pythonhosted.org/packages/ab/bc/dc2d03555289ae5263f65c01d45eb186ce347585c191daf0e60021d5ed39/numba-0.61.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:764f0e47004f126f58c3b28e0a02374c420a9d15157b90806d68590f5c20cc89", size = 3580239 }, + { url = "https://files.pythonhosted.org/packages/61/08/71247ce560d2c222d9ca705c7d3547fc4069b96fc85d71aabeb890befe9f/numba-0.61.0-cp313-cp313-win_amd64.whl", hash = "sha256:074cd38c5b1f9c65a4319d1f3928165f48975ef0537ad43385b2bd908e6e2e35", size = 2828035 }, ] [[package]] @@ -4021,7 +4060,7 @@ wheels = [ [[package]] name = "transformers" -version = "4.49.0" +version = "4.51.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "filelock" }, @@ -4035,9 +4074,9 @@ dependencies = [ { name = "tokenizers" }, { name = "tqdm" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/79/50/46573150944f46df8ec968eda854023165a84470b42f69f67c7d475dabc5/transformers-4.49.0.tar.gz", hash = "sha256:7e40e640b5b8dc3f48743f5f5adbdce3660c82baafbd3afdfc04143cdbd2089e", size = 8610952 } +sdist = { url = "https://files.pythonhosted.org/packages/f1/11/7414d5bc07690002ce4d7553602107bf969af85144bbd02830f9fb471236/transformers-4.51.3.tar.gz", hash = "sha256:e292fcab3990c6defe6328f0f7d2004283ca81a7a07b2de9a46d67fd81ea1409", size = 8941266 } wheels = [ - { url = "https://files.pythonhosted.org/packages/20/37/1f29af63e9c30156a3ed6ebc2754077016577c094f31de7b2631e5d379eb/transformers-4.49.0-py3-none-any.whl", hash = "sha256:6b4fded1c5fee04d384b1014495b4235a2b53c87503d7d592423c06128cbbe03", size = 9970275 }, + { url = "https://files.pythonhosted.org/packages/a9/b6/5257d04ae327b44db31f15cce39e6020cc986333c715660b1315a9724d82/transformers-4.51.3-py3-none-any.whl", hash = "sha256:fd3279633ceb2b777013234bbf0b4f5c2d23c4626b05497691f00cfda55e8a83", size = 10383940 }, ] [[package]] @@ -4166,7 +4205,7 @@ wheels = [ [[package]] name = "vllm" -version = "0.8.2" +version = "0.8.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohttp" }, @@ -4179,6 +4218,7 @@ dependencies = [ { name = "fastapi", extra = ["standard"] }, { name = "filelock" }, { name = "gguf" }, + { name = "huggingface-hub", extra = ["hf-xet"] }, { name = "importlib-metadata" }, { name = "lark" }, { name = "llguidance", marker = "platform_machine == 'aarch64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" }, @@ -4189,6 +4229,7 @@ dependencies = [ { name = "numba" }, { name = "numpy" }, { name = "openai" }, + { name = "opencv-python-headless" }, { name = "outlines" }, { name = "partial-json-parser" }, { name = "pillow" }, @@ -4219,9 +4260,9 @@ dependencies = [ { name = "xformers", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "xgrammar", marker = "platform_machine == 'aarch64' or platform_machine == 'x86_64'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/df/4d/6b27cc14d0c35e578a743a767953500a801ba296694b7e44cca709738b41/vllm-0.8.2.tar.gz", hash = "sha256:9b337b1c4072ccb94b1bf2b716593fadbe2dcb8d091f9bcbd6b5c6d37f9842ac", size = 6450146 } +sdist = { url = "https://files.pythonhosted.org/packages/62/ef/238efdf161d527e7872f1792f731fbddcc17ad6362dd43b23dd6c91add1c/vllm-0.8.3.tar.gz", hash = "sha256:475a39d1093b8ef8a905d63eafe0c6c9b8f4f4c2ae2d23f1f3d0fae5e37bb4bd", size = 6618606 } wheels = [ - { url = "https://files.pythonhosted.org/packages/57/49/207364110b96d76139a4e80617e5831d46884abe824941b15c8a748ca5e0/vllm-0.8.2-cp38-abi3-manylinux1_x86_64.whl", hash = "sha256:32442b686c5dad8e6ddcf5a8b0cf3f741359fed6a9e9e940009f1daf80ae15de", size = 293643693 }, + { url = "https://files.pythonhosted.org/packages/2a/99/58ba40e42ec6358ff4da5b6b6ce2ac9f8b10329fcfd65c9ee12c124f37f9/vllm-0.8.3-cp38-abi3-manylinux1_x86_64.whl", hash = "sha256:5488af1cf912ca8a7fad622512e0502235f5377ee36571c04361cbc31105c811", size = 294034759 }, ] [[package]] @@ -4474,9 +4515,10 @@ wheels = [ [[package]] name = "xgrammar" -version = "0.1.16" +version = "0.1.17" source = { registry = "https://pypi.org/simple" } dependencies = [ + { name = "nanobind" }, { name = "ninja" }, { name = "pydantic" }, { name = "sentencepiece" }, @@ -4485,26 +4527,26 @@ dependencies = [ { name = "transformers" }, { name = "triton", marker = "platform_machine == 'x86_64' and platform_system == 'linux'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b8/68/993f4ede8a65c35c242bf70af1f1acee1e27a38649b38c6e9796280a9831/xgrammar-0.1.16.tar.gz", hash = "sha256:4ddd5128a82d0a9c800c03df25c610368ca630704ad20a6bb7a3629f24ced442", size = 1675541 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/81/3b/11c6fc8fd95469bd029bac4c88627ce4226f6f9cdba83ed672ce991da6c2/xgrammar-0.1.16-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:027c6937748d22b1c2db2850c99e3cdca6b9532817ad2013b6fb646f07cc8448", size = 380066 }, - { url = "https://files.pythonhosted.org/packages/5b/7e/e80e1e4c19a73dbe7e762309fd1bfd874c075f4a05336860269ddbe424fb/xgrammar-0.1.16-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ae561d74bcfacfe96970e3ec847cdeeda7fe2cb3ad38ff44ad370de75cef5615", size = 350211 }, - { url = "https://files.pythonhosted.org/packages/ee/f7/6d4e67d19e42f3a45323241fea030129e74da250faaf7c7efd9a09f216e9/xgrammar-0.1.16-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46e52514479056418495d68413c2ea18798b95dcdc36d25f48b281ca7d203ce1", size = 4743864 }, - { url = "https://files.pythonhosted.org/packages/14/a6/8d7171595da3345768a1222e59e43def72f6d78dd2510dcd68d4aec6f185/xgrammar-0.1.16-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d898e3dc04ea7d81a0e9cd10b632c22707fcc9ce02d7be3c0aa6c38067af97f", size = 4808172 }, - { url = "https://files.pythonhosted.org/packages/67/94/f526dd17eb2c1fc08d01d6ae85de6198147ab8d80745a540a8c9c9f9f309/xgrammar-0.1.16-cp310-cp310-win_amd64.whl", hash = "sha256:04e361b22926f431ae82fad3c4463e0d3c8f653fe15ebe3d7059edf73e348565", size = 442688 }, - { url = "https://files.pythonhosted.org/packages/fe/b2/b4aafc0487cde77dbae781aefa3fc449193ca30f04a37e2ea9fd0a8ebf8f/xgrammar-0.1.16-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:23d016b09b22ad77a0cc7de49e2a7152d8cd221734aa6d41b5fd7827dfb1a4d3", size = 381666 }, - { url = "https://files.pythonhosted.org/packages/45/55/3416e235a07a97e32fc0b678266e605e61a7f52219570ad9e78618dd47b3/xgrammar-0.1.16-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bd151867c7007c1af27c901d3fd9dd178e41468775b782e083d0d125228a915f", size = 351692 }, - { url = "https://files.pythonhosted.org/packages/8b/69/6d6eb9ec2ec521665102881c5caaaccd0b6f44eeaeeb9397078270d9bb1d/xgrammar-0.1.16-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:54a3d4386b538fe0a6b6399de2592dd57756e31c1def812cf9653b8f91f827d8", size = 4751115 }, - { url = "https://files.pythonhosted.org/packages/c6/25/4dd662eadee7200dd22a97bac8dfa48a1cc2712714785bf2e1b12d7567c7/xgrammar-0.1.16-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab1850ffb1615c1370e4ba3d4dafb2c116a03a06683b9fcf309982c49b8c2f87", size = 4815162 }, - { url = "https://files.pythonhosted.org/packages/5f/89/68af4b94cf8e3fd6f11ca107f4e9c782053dd593d3dab5896ca4ffe5455f/xgrammar-0.1.16-cp311-cp311-win_amd64.whl", hash = "sha256:eb381bc5a1b8f17477700447a6cc676f22e91cc54a96f45dabe803f7fb0aec4d", size = 443920 }, - { url = "https://files.pythonhosted.org/packages/fd/ce/605628aa8eb99ac8ba3df32fc39ad598e8e9bd9ab6d6546dc4f6fde6f6f6/xgrammar-0.1.16-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:60967ad8435448c183ad911c9c5252e5cb0b032b37f86dcfc16cdd07c35954f6", size = 382751 }, - { url = "https://files.pythonhosted.org/packages/dd/7d/0b04a7a75fe3e5a8cdff905c130d776286723f2ea7be240cd205a7916814/xgrammar-0.1.16-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:90fae6c9256753f9816aacddf8c37176eded8b4164024d28d6342ea4b9182ae9", size = 351730 }, - { url = "https://files.pythonhosted.org/packages/15/b1/b619f6df882f2b4b2df2072543590e0e5fbf4abe80876ff8308612bd5758/xgrammar-0.1.16-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2d75e6501f55368462b4d61ce0fb6a65c587782faa7319f48f49a8c444b4245f", size = 4727149 }, - { url = "https://files.pythonhosted.org/packages/f0/4b/94c5801b458d0840c906944a376c50ea3128e98e7819421e246a47d7dd2d/xgrammar-0.1.16-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51565f8e6eb17fefe7ce90aa4598cf8216b4ee801a33d58d8439242d3d18cfa6", size = 4796416 }, - { url = "https://files.pythonhosted.org/packages/e5/fd/7db507fb605692d64d0f341679e3300cefb64c67f7a6cc8274c7de43d9e5/xgrammar-0.1.16-cp312-cp312-win_amd64.whl", hash = "sha256:97322341c29185b31482459325160dc2fb3eeb99bdf52cfeb57ae61a7e76c9d1", size = 443953 }, - { url = "https://files.pythonhosted.org/packages/f8/3d/a798f138d5c60eb787cefb1f3739996fdb42dabbde6a94c2f606c8631a56/xgrammar-0.1.16-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:854e2b23d0099c590cbc8bb83ab7de7d7ba3acb8aab65d64fa1436af0639f80c", size = 351818 }, - { url = "https://files.pythonhosted.org/packages/1b/c3/74710d142d716c74bdaeaa4a17d2e90e8eb58d1ed525b49d2a49448b385d/xgrammar-0.1.16-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d3c4fbc944bc2c0529da3efe0c5accab20df6c99aef7adfd17e3d0fecd10a80a", size = 4793630 }, - { url = "https://files.pythonhosted.org/packages/ae/63/ea6bd4c3e367b473ba4c8269a70cf723ae2b9b0aadce360b07922a8451dc/xgrammar-0.1.16-cp313-cp313-win_amd64.whl", hash = "sha256:2301413a374f11add07843dfb49050f27beae89a4be7e0ffd454c08cf302412c", size = 443983 }, +sdist = { url = "https://files.pythonhosted.org/packages/e6/f9/6d530ce703cf5aae65d594a5ab984b9c0c4956e6fdbcc3279e8b1eaa358e/xgrammar-0.1.17.tar.gz", hash = "sha256:8f6cd7b3436482ad8c94b6cc93892a7f36381315c443e8e7f256f8d71c3efdee", size = 1679977 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/13/ca/61c54819ba1b00c5c189d6bd24e4f9b4ab6d334f18b339fd21397b1ccc11/xgrammar-0.1.17-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:829ab14ab2dee067955a3e55639f5f2c2ca4c5a4a6cb60a24b6655bf995f50e4", size = 372103 }, + { url = "https://files.pythonhosted.org/packages/14/18/b34ab691f65389b9939c49ac1188517194c3dadfa3a6ac3f5627226789bc/xgrammar-0.1.17-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cee7985c536d0648e774846ed7e59fd4bea0bcc03b1654d04e723000954308e4", size = 341599 }, + { url = "https://files.pythonhosted.org/packages/53/38/f805fd4eaafd78fac029bd14bf3ac243854c2afccc71c34c6942e6be5439/xgrammar-0.1.17-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7c80b26ee041a49a7a0d20c05cf09c05937713c4c2c2d04a24b85ae76ee23d9b", size = 4234957 }, + { url = "https://files.pythonhosted.org/packages/58/20/21b5e35d20b6889a403f610aefb1306798c13de0c8d76c7a8bdff5608000/xgrammar-0.1.17-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ba897085b2d3dc8f9ffadfc66224e8031f05db91c142a7e7a0be984306a7fc1", size = 4308431 }, + { url = "https://files.pythonhosted.org/packages/8b/90/004b58a55fdb782f98ed27e591786e78475ead9fb25774dab0a101df5a5a/xgrammar-0.1.17-cp310-cp310-win_amd64.whl", hash = "sha256:d1dc8e880f01ec8f22414542af304446c764c00667aae98e10053d4fc14d1f57", size = 422436 }, + { url = "https://files.pythonhosted.org/packages/53/bd/0abe8e01a3390feb60e9e1799f91b0c2a873c2ff1fa87052c18492b3b71b/xgrammar-0.1.17-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:cfd95f0a8dc7f025921d93fed9c78b3b0dfb28e89b3e9e37c393470ca57352e0", size = 371921 }, + { url = "https://files.pythonhosted.org/packages/96/ee/71fe485df88d111c26e265000f19b4521abf5660278f283ebed671977261/xgrammar-0.1.17-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:98cfd1efe13e446a5d96741202db375a8c807630c95624976889e6831e94c675", size = 341466 }, + { url = "https://files.pythonhosted.org/packages/91/6e/2592870e0a2c061ac7ea5607e82ed5f30daa05dee1896297b4f19e77e9bd/xgrammar-0.1.17-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:873d13f863561ac902f938da63201d81a1f6424365c7f89fb15910a7147b3ec0", size = 4236127 }, + { url = "https://files.pythonhosted.org/packages/f1/05/a31e2f04b0cb510f867da3094b35dc893622debbe1254e02accf6683c7aa/xgrammar-0.1.17-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87940387b4850b4e5e1f68888f9ce1e7236f94dbbf1ba3ebcd08a9a5cab0d66d", size = 4309348 }, + { url = "https://files.pythonhosted.org/packages/c5/3a/1afa276678a9e050323e9ab3013e0ca25df02ff24ced496c8ccec93749bd/xgrammar-0.1.17-cp311-cp311-win_amd64.whl", hash = "sha256:3505efb81a6a2b59b843b99c6c0bc09dc0d924307c18c0de693a919fe10066d6", size = 422201 }, + { url = "https://files.pythonhosted.org/packages/c7/32/deaee8f04d24bc2ed38c14fb01d6faa2319fb361353bbbebac4bdf801ac6/xgrammar-0.1.17-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:cc8e1e4a3298aae9856416e1366ccd07d4c6b5556921ecd108c579b1184522d2", size = 371412 }, + { url = "https://files.pythonhosted.org/packages/35/ed/59a89ef003235f746fa989bf82e8425e6b046d65349feacd1b57b4763141/xgrammar-0.1.17-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a7712942793727f0c490f6f2388d5995632cc0c8258a7aff33577ff0f47bc513", size = 340973 }, + { url = "https://files.pythonhosted.org/packages/48/bc/f6f5f16d9cb57684f23a62d3f51deed410da6c9708bf3d5eb679dd867dc0/xgrammar-0.1.17-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b3e998ab30662b5f090978d04928f20467df973116c17624f868fa7717ff683", size = 4236280 }, + { url = "https://files.pythonhosted.org/packages/8a/89/8d4b7a8bf5af80564081555f1734d668e5496e90171280de9153d0696065/xgrammar-0.1.17-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1687ce767c5ca0fe101f699c2691762a037a6b0159608f6c4a720bccdb57ee8a", size = 4310624 }, + { url = "https://files.pythonhosted.org/packages/27/37/8e31a5a44b21e89755795103df04fadb390db395c9fe65179acc9bf067b4/xgrammar-0.1.17-cp312-cp312-win_amd64.whl", hash = "sha256:9572b4c571cf39f6ffd29915b73d3cc13303c72aa86043660f46f66746b5b947", size = 421404 }, + { url = "https://files.pythonhosted.org/packages/62/22/c0eab43801aba25046b3ea74cd3575560086c56a78f4be13033c76735c22/xgrammar-0.1.17-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:668171673af6244108e3ec6317bca592e627be3a57d4c250bd1ce78a23d4d127", size = 340909 }, + { url = "https://files.pythonhosted.org/packages/b5/07/787c48716e9dddbc4beea6c22a5e25f952d6680937788065dec0354b7d74/xgrammar-0.1.17-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9dbe59d8b3bc44ec092914cda20728b69a73b2979596f2f0a7b868aaecd234b0", size = 4309322 }, + { url = "https://files.pythonhosted.org/packages/86/2e/5677e586427b9d32715d5ef672429f5e111d7531bc289b96945e95041c3d/xgrammar-0.1.17-cp313-cp313-win_amd64.whl", hash = "sha256:fd2f044eec970db462932fd736330bb76060d41fa6cc23e000f486b53fbdcf34", size = 421329 }, ] [[package]] From d0a0c0650874a8ad17de083fa10e3badbfdec4b7 Mon Sep 17 00:00:00 2001 From: Alex Qiu Date: Tue, 15 Apr 2025 16:10:41 +0800 Subject: [PATCH 04/13] use tags to separately wakeup vllm to reduce refitting peak memory Signed-off-by: Alex Qiu --- nemo_reinforcer/algorithms/grpo.py | 3 ++- nemo_reinforcer/models/generation/vllm.py | 10 +++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/nemo_reinforcer/algorithms/grpo.py b/nemo_reinforcer/algorithms/grpo.py index 6d032a4a96..53f8d249aa 100644 --- a/nemo_reinforcer/algorithms/grpo.py +++ b/nemo_reinforcer/algorithms/grpo.py @@ -277,13 +277,14 @@ def refit_policy_generation( ): """Refit the policy generation interface with the latest policy weights.""" policy.offload_before_refit() - policy_generation.prepare_for_generation() + policy_generation.prepare_for_generation(tags=["weights"]) # Streaming update weights to save memory param_keys = policy.prepare_weights_for_ipc() for key in param_keys: ipc_handles = policy.get_weights_ipc_handles(key) policy_generation.update_weights(ipc_handles) policy.offload_after_refit() + policy_generation.prepare_for_generation(tags=["kv_cache"]) def generate_responses( diff --git a/nemo_reinforcer/models/generation/vllm.py b/nemo_reinforcer/models/generation/vllm.py index ada0bf2623..0f1d13d1ae 100644 --- a/nemo_reinforcer/models/generation/vllm.py +++ b/nemo_reinforcer/models/generation/vllm.py @@ -424,8 +424,12 @@ def sleep(self): gc.collect() torch.cuda.empty_cache() - def wake_up(self): - self.llm.wake_up() + def wake_up(self, **kwargs): + # tags like ["weights", "kv_cache"] + if "tags" in kwargs: + self.llm.wake_up(tags=kwargs["tags"]) + else: + self.llm.wake_up() class VllmGeneration(GenerationInterface): @@ -594,7 +598,7 @@ def prepare_for_generation(self, *args, **kwargs): try: # Use run_all_workers_single_data for methods that don't need data futures = self.worker_group.run_all_workers_single_data( - "wake_up", only_on="tied_leader" + "wake_up", only_on="tied_leader", **kwargs ) # Wait for all futures to complete results = ray.get(futures) From a43335bf1793b79fc942237e11e7cb3b9ae5b8d2 Mon Sep 17 00:00:00 2001 From: Yuki Huang Date: Fri, 18 Apr 2025 10:15:42 +0000 Subject: [PATCH 05/13] fix unit test Signed-off-by: Yuki Huang --- tests/unit/models/generation/test_vllm_generation.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/unit/models/generation/test_vllm_generation.py b/tests/unit/models/generation/test_vllm_generation.py index 8d17ef11fa..60338775c0 100644 --- a/tests/unit/models/generation/test_vllm_generation.py +++ b/tests/unit/models/generation/test_vllm_generation.py @@ -748,7 +748,8 @@ def test_vllm_weight_update_and_prefix_cache_reset( torch.cuda.empty_cache() -def test_vllm_weight_update_memory(cluster, tokenizer): +@pytest.mark.parametrize("enable_dtensor", [True, False]) +def test_vllm_weight_update_memory(cluster, tokenizer, enable_dtensor): """Test that vLLM streaming weight update and can save memory.""" from nemo_reinforcer.models.policy.hf_policy import HfPolicy @@ -770,7 +771,7 @@ def test_vllm_weight_update_memory(cluster, tokenizer): vllm_policy.finish_generation() print("Creating HF policy...") - hf_config = basic_hf_test_config.copy() + hf_config = get_basic_hf_test_config(enable_dtensor=enable_dtensor) hf_policy = HfPolicy(cluster, hf_config, tokenizer) print(f"refitting vllm policy...") From 6abd3f79600ef45affb872b926a893732c58b5ec Mon Sep 17 00:00:00 2001 From: Yuki Huang Date: Fri, 18 Apr 2025 11:13:52 +0000 Subject: [PATCH 06/13] update unit test threshold Signed-off-by: Yuki Huang --- tests/unit/models/generation/test_vllm_generation.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/unit/models/generation/test_vllm_generation.py b/tests/unit/models/generation/test_vllm_generation.py index 60338775c0..72ea3f9127 100644 --- a/tests/unit/models/generation/test_vllm_generation.py +++ b/tests/unit/models/generation/test_vllm_generation.py @@ -799,8 +799,12 @@ def test_vllm_weight_update_memory(cluster, tokenizer, enable_dtensor): assert current_reserved == 0.0, "Memory should be 0 after refit completed" # memory threshold: memory during non-streaming weight update on 1B model on 2 GPUs # memory during streaming weight update should less than this baseline threshold - assert peak_allocated < 11286, "Peak allocated memory should be less than 11286MB" - assert peak_reserved < 11298, "Peak reserved memory should be less than 11298MB" + if enable_dtensor: + assert peak_allocated < 8074, "Peak allocated memory should < 8074 MB" + assert peak_reserved < 8088, "Peak reserved memory should < 8088 MB" + else: + assert peak_allocated < 11286, "Peak allocated memory should < 11286 MB" + assert peak_reserved < 11298, "Peak reserved memory should < 11298 MB" # Clean up vllm_policy.shutdown() From dd2bff4877d301d7be2e9f23b314c7a140b93d6c Mon Sep 17 00:00:00 2001 From: Parth Chadha Date: Mon, 21 Apr 2025 16:40:25 -0700 Subject: [PATCH 07/13] Use vllm 0.8.4 Signed-off-by: Parth Chadha --- pyproject.toml | 2 +- uv.lock | 283 +++++++++++++++++++++++++++++++++++-------------- 2 files changed, 207 insertions(+), 78 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4b3e064a4e..83c8f86ab7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,7 @@ readme = {file = "README.md", content-type = "text/markdown"} [project.optional-dependencies] vllm = [ - "vllm==0.8.3", + "vllm==0.8.4", ] [dependency-groups] diff --git a/uv.lock b/uv.lock index c5289a2bed..9902633207 100644 --- a/uv.lock +++ b/uv.lock @@ -549,16 +549,16 @@ wheels = [ [[package]] name = "compressed-tensors" -version = "0.9.2" +version = "0.9.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pydantic" }, { name = "torch" }, { name = "transformers" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/cf/43/2b5ada16e9e70c62dc24e30ef3a9f22782ab4130128b52b6345ead8d0de3/compressed_tensors-0.9.2.tar.gz", hash = "sha256:18c5627a7324a75cd4c7d984799269e0ddef592b6fb3b9a81c16754d5c4b56ff", size = 65839 } +sdist = { url = "https://files.pythonhosted.org/packages/91/3e/f74c5dcca6552e15a00df4a78c6e4a8776a7c901acc5a8c1dd371698ef54/compressed_tensors-0.9.3.tar.gz", hash = "sha256:5bdc7774a6c217496cba7d6a4fca6ffac943e68adae0481ead6d036660c1b340", size = 66354 } wheels = [ - { url = "https://files.pythonhosted.org/packages/bb/6e/dc0a80ce14802344e3f4d0520285e8773b83ec2fd864e7cab886718f55a9/compressed_tensors-0.9.2-py3-none-any.whl", hash = "sha256:fbc5d188ee43f93eccd6df566e8eccbb1eba907560b2b81ca85153335df55dd9", size = 97875 }, + { url = "https://files.pythonhosted.org/packages/79/87/9c7eb4b57f89a51a65bee166cc079cd1bc1b398823da4f3b3c12f1021af8/compressed_tensors-0.9.3-py3-none-any.whl", hash = "sha256:5fcc3e4e7aa828036c2aeb130a610f9745a2e4890692cad6f6b5a2f960b21cc1", size = 98449 }, ] [[package]] @@ -674,6 +674,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/16/44/5de560a2625d31801895fb2663693df210c6465960d61a99192caa9afd63/datasets-3.4.1-py3-none-any.whl", hash = "sha256:b91cf257bd64132fa9d953dd4768ab6d63205597301f132a74271cfcce8b5dd3", size = 487392 }, ] +[[package]] +name = "deprecated" +version = "1.2.18" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/98/97/06afe62762c9a8a86af0cfb7bfdab22a43ad17138b07af5b1a58442690a2/deprecated-1.2.18.tar.gz", hash = "sha256:422b6f6d859da6f2ef57857761bfb392480502a64c3028ca9bbe86085d72115d", size = 2928744 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6e/c6/ac0b6c1e2d138f1002bcf799d330bd6d85084fece321e662a14223794041/Deprecated-1.2.18-py2.py3-none-any.whl", hash = "sha256:bd5011788200372a32418f888e326a09ff80d0214bd961147cfed01b5c018eec", size = 9998 }, +] + [[package]] name = "depyf" version = "0.18.0" @@ -953,16 +965,17 @@ http = [ [[package]] name = "gguf" -version = "0.10.0" +version = "0.16.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "numpy" }, { name = "pyyaml" }, + { name = "sentencepiece" }, { name = "tqdm" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/0e/c4/a159e9f842b0e8b8495b2689af6cf3426f002cf01207ca8134db82fc4088/gguf-0.10.0.tar.gz", hash = "sha256:52a30ef26328b419ffc47d9269fc580c238edf1c8a19b5ea143c323e04a038c1", size = 65704 } +sdist = { url = "https://files.pythonhosted.org/packages/c8/56/9c34a40ef5ad96e02cfe49958cf884496f145d101605551663753ae1657c/gguf-0.16.2.tar.gz", hash = "sha256:0fc956289a30d0f1f3afd75ec0d493f73ae2629a3f21f3846dd1687d8791c7c1", size = 85129 } wheels = [ - { url = "https://files.pythonhosted.org/packages/1b/e4/c5f9bd71840ae9afb7e2b7c285ba209f2ef5e9cd83885f8c596c551d3026/gguf-0.10.0-py3-none-any.whl", hash = "sha256:706089fba756a06913227841b4a6c8398360fa991569fd974e663a92b224e33f", size = 71584 }, + { url = "https://files.pythonhosted.org/packages/15/18/89697e4996920aa1e60f0061d0bb110f738a5ba3de12ed74309f51a10a0a/gguf-0.16.2-py3-none-any.whl", hash = "sha256:e73eb19b30fcc7c7f32894345024dda8b1a0c959b94a12b7c40ded8dd3f96810", size = 92154 }, ] [[package]] @@ -1233,14 +1246,14 @@ wheels = [ [[package]] name = "importlib-metadata" -version = "8.6.1" +version = "8.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "zipp" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/33/08/c1395a292bb23fd03bdf572a1357c5a733d3eecbab877641ceacab23db6e/importlib_metadata-8.6.1.tar.gz", hash = "sha256:310b41d755445d74569f993ccfc22838295d9fe005425094fad953d7f15c8580", size = 55767 } +sdist = { url = "https://files.pythonhosted.org/packages/20/ff/bd28f70283b9cca0cbf0c2a6082acbecd822d1962ae7b2a904861b9965f8/importlib_metadata-8.0.0.tar.gz", hash = "sha256:188bd24e4c346d3f0a933f275c2fec67050326a856b9a359881d7c2a697e8812", size = 52667 } wheels = [ - { url = "https://files.pythonhosted.org/packages/79/9d/0fb148dc4d6fa4a7dd1d8378168d9b4cd8d4560a6fbf6f0121c5fc34eb68/importlib_metadata-8.6.1-py3-none-any.whl", hash = "sha256:02a89390c1e15fdfdc0d7c6b25cb3e62650d0494005c97d6f148bf5b9787525e", size = 26971 }, + { url = "https://files.pythonhosted.org/packages/dc/ef/38766b2edb096260d9b1b6ad35adaa0bce3b0567abb452b21eb074af88c4/importlib_metadata-8.0.0-py3-none-any.whl", hash = "sha256:15584cf2b1bf449d98ff8a6ff1abef57bf20f3ac6454f431736cd3e660921b2f", size = 24769 }, ] [[package]] @@ -1793,15 +1806,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5f/df/76d0321c3797b54b60fef9ec3bd6f4cfd124b9e422182156a1dd418722cf/myst_parser-4.0.1-py3-none-any.whl", hash = "sha256:9134e88959ec3b5780aedf8a99680ea242869d012e8821db3126d427edc9c95d", size = 84579 }, ] -[[package]] -name = "nanobind" -version = "2.7.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d8/7d/f77f2bc2e2a210502a164556f8a742cd0f72f39061b97cb9d73bbd3ff0ab/nanobind-2.7.0.tar.gz", hash = "sha256:f9f1b160580c50dcf37b6495a0fd5ec61dc0d95dae5f8004f87dd9ad7eb46b34", size = 976093 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/96/14/989883082b395146120d34ca7e484a2b24cb73b0e428576a3a4249bd4082/nanobind-2.7.0-py3-none-any.whl", hash = "sha256:73b12d0e751d140d6c1bf4b215e18818a8debfdb374f08dc3776ad208d808e74", size = 241690 }, -] - [[package]] name = "nemo-reinforcer" source = { editable = "." } @@ -1870,7 +1874,7 @@ requires-dist = [ { name = "torch", specifier = "==2.6.0" }, { name = "torchdata" }, { name = "transformers" }, - { name = "vllm", marker = "extra == 'vllm'", specifier = "==0.8.3" }, + { name = "vllm", marker = "extra == 'vllm'", specifier = "==0.8.4" }, { name = "wandb" }, ] provides-extras = ["vllm"] @@ -1953,34 +1957,34 @@ wheels = [ [[package]] name = "numba" -version = "0.61.0" +version = "0.61.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "llvmlite" }, { name = "numpy" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/3c/88/c13a935f200fda51384411e49840a8e7f70c9cb1ee8d809dd0f2477cf7ef/numba-0.61.0.tar.gz", hash = "sha256:888d2e89b8160899e19591467e8fdd4970e07606e1fbc248f239c89818d5f925", size = 2816484 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/77/97/8568a025b9ab8b4d53491e70d4206d5f3fc71fbe94f3097058e01ad8e7ff/numba-0.61.0-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:9cab9783a700fa428b1a54d65295122bc03b3de1d01fb819a6b9dbbddfdb8c43", size = 2769008 }, - { url = "https://files.pythonhosted.org/packages/8c/ab/a88c20755f66543ee01c85c98b866595b92e1bd0ed80565a4889e22929a8/numba-0.61.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:46c5ae094fb3706f5adf9021bfb7fc11e44818d61afee695cdee4eadfed45e98", size = 2771815 }, - { url = "https://files.pythonhosted.org/packages/ae/f4/b357913089ecec1a9ddc6adc04090396928f36a484a5ab9e71b24ddba4cd/numba-0.61.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6fb74e81aa78a2303e30593d8331327dfc0d2522b5db05ac967556a26db3ef87", size = 3820233 }, - { url = "https://files.pythonhosted.org/packages/ea/60/0e21bcf3baaf10e39d48cd224618e46a6b75d3394f465c37ce57bf98cbfa/numba-0.61.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:0ebbd4827091384ab8c4615ba1b3ca8bc639a3a000157d9c37ba85d34cd0da1b", size = 3514707 }, - { url = "https://files.pythonhosted.org/packages/a0/08/45c136ab59e6b11e61ce15a0d17ef03fd89eaccb0db05ad67912aaf5218a/numba-0.61.0-cp310-cp310-win_amd64.whl", hash = "sha256:43aa4d7d10c542d3c78106b8481e0cbaaec788c39ee8e3d7901682748ffdf0b4", size = 2827753 }, - { url = "https://files.pythonhosted.org/packages/63/8f/f983a7c859ccad73d3cc3f86fbba94f16e137cd1ee464631d61b624363b2/numba-0.61.0-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:bf64c2d0f3d161af603de3825172fb83c2600bcb1d53ae8ea568d4c53ba6ac08", size = 2768960 }, - { url = "https://files.pythonhosted.org/packages/be/1b/c33dc847d475d5b647b4ad5aefc38df7a72283763f4cda47745050375a81/numba-0.61.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:de5aa7904741425f28e1028b85850b31f0a245e9eb4f7c38507fb893283a066c", size = 2771862 }, - { url = "https://files.pythonhosted.org/packages/14/91/18b9f64b34ff318a14d072251480547f89ebfb864b2b7168e5dc5f64f502/numba-0.61.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:21c2fe25019267a608e2710a6a947f557486b4b0478b02e45a81cf606a05a7d4", size = 3825411 }, - { url = "https://files.pythonhosted.org/packages/f2/97/1a38030c2a331e273ace1de2b61988e33d80878fda8a5eedee0cd78399d3/numba-0.61.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:74250b26ed6a1428763e774dc5b2d4e70d93f73795635b5412b8346a4d054574", size = 3519604 }, - { url = "https://files.pythonhosted.org/packages/df/a7/56f547de8fc197963f238fd62beb5f1d2cace047602d0577956bf6840970/numba-0.61.0-cp311-cp311-win_amd64.whl", hash = "sha256:b72bbc8708e98b3741ad0c63f9929c47b623cc4ee86e17030a4f3e301e8401ac", size = 2827642 }, - { url = "https://files.pythonhosted.org/packages/63/c9/c61881e7f2e253e745209f078bbd428ce23b6cf901f7d93afe166720ff95/numba-0.61.0-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:152146ecdbb8d8176f294e9f755411e6f270103a11c3ff50cecc413f794e52c8", size = 2769758 }, - { url = "https://files.pythonhosted.org/packages/e1/28/ddec0147a4933f86ceaca580aa9bb767d5632ecdb1ece6cfb3eab4ac78e5/numba-0.61.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5cafa6095716fcb081618c28a8d27bf7c001e09696f595b41836dec114be2905", size = 2772445 }, - { url = "https://files.pythonhosted.org/packages/18/74/6a9f0e6c76c088f8a6aa702eab31734068061dca5cc0f34e8bc1eb447de1/numba-0.61.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ffe9fe373ed30638d6e20a0269f817b2c75d447141f55a675bfcf2d1fe2e87fb", size = 3882115 }, - { url = "https://files.pythonhosted.org/packages/53/68/d7c31e53f08e6b4669c9b5a3cd7c5fb9097220c5ef388bc099ca8ab9749f/numba-0.61.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:9f25f7fef0206d55c1cfb796ad833cbbc044e2884751e56e798351280038484c", size = 3573296 }, - { url = "https://files.pythonhosted.org/packages/94/4f/8357a99a14f331b865a42cb4756ae37da85599b9c95e01277ea10361e91a/numba-0.61.0-cp312-cp312-win_amd64.whl", hash = "sha256:550d389573bc3b895e1ccb18289feea11d937011de4d278b09dc7ed585d1cdcb", size = 2828077 }, - { url = "https://files.pythonhosted.org/packages/3b/54/71fba18e4af5619f1ea8175ee92e82dd8e220bd6feb8c0153c6b814c8a60/numba-0.61.0-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:b96fafbdcf6f69b69855273e988696aae4974115a815f6818fef4af7afa1f6b8", size = 2768024 }, - { url = "https://files.pythonhosted.org/packages/39/76/2448b43d08e904aad1b1b9cd12835b19411e84a81aa9192f83642a5e0afd/numba-0.61.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5f6c452dca1de8e60e593f7066df052dd8da09b243566ecd26d2b796e5d3087d", size = 2769541 }, - { url = "https://files.pythonhosted.org/packages/32/8f/4bb2374247ab988c9eac587b304b2947a36d605b9bb9ba4bf06e955c17d3/numba-0.61.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:44240e694d4aa321430c97b21453e46014fe6c7b8b7d932afa7f6a88cc5d7e5e", size = 3890102 }, - { url = "https://files.pythonhosted.org/packages/ab/bc/dc2d03555289ae5263f65c01d45eb186ce347585c191daf0e60021d5ed39/numba-0.61.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:764f0e47004f126f58c3b28e0a02374c420a9d15157b90806d68590f5c20cc89", size = 3580239 }, - { url = "https://files.pythonhosted.org/packages/61/08/71247ce560d2c222d9ca705c7d3547fc4069b96fc85d71aabeb890befe9f/numba-0.61.0-cp313-cp313-win_amd64.whl", hash = "sha256:074cd38c5b1f9c65a4319d1f3928165f48975ef0537ad43385b2bd908e6e2e35", size = 2828035 }, +sdist = { url = "https://files.pythonhosted.org/packages/1c/a0/e21f57604304aa03ebb8e098429222722ad99176a4f979d34af1d1ee80da/numba-0.61.2.tar.gz", hash = "sha256:8750ee147940a6637b80ecf7f95062185ad8726c8c28a2295b8ec1160a196f7d", size = 2820615 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/eb/ca/f470be59552ccbf9531d2d383b67ae0b9b524d435fb4a0d229fef135116e/numba-0.61.2-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:cf9f9fc00d6eca0c23fc840817ce9f439b9f03c8f03d6246c0e7f0cb15b7162a", size = 2775663 }, + { url = "https://files.pythonhosted.org/packages/f5/13/3bdf52609c80d460a3b4acfb9fdb3817e392875c0d6270cf3fd9546f138b/numba-0.61.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ea0247617edcb5dd61f6106a56255baab031acc4257bddaeddb3a1003b4ca3fd", size = 2778344 }, + { url = "https://files.pythonhosted.org/packages/e2/7d/bfb2805bcfbd479f04f835241ecf28519f6e3609912e3a985aed45e21370/numba-0.61.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ae8c7a522c26215d5f62ebec436e3d341f7f590079245a2f1008dfd498cc1642", size = 3824054 }, + { url = "https://files.pythonhosted.org/packages/e3/27/797b2004745c92955470c73c82f0e300cf033c791f45bdecb4b33b12bdea/numba-0.61.2-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:bd1e74609855aa43661edffca37346e4e8462f6903889917e9f41db40907daa2", size = 3518531 }, + { url = "https://files.pythonhosted.org/packages/b1/c6/c2fb11e50482cb310afae87a997707f6c7d8a48967b9696271347441f650/numba-0.61.2-cp310-cp310-win_amd64.whl", hash = "sha256:ae45830b129c6137294093b269ef0a22998ccc27bf7cf096ab8dcf7bca8946f9", size = 2831612 }, + { url = "https://files.pythonhosted.org/packages/3f/97/c99d1056aed767503c228f7099dc11c402906b42a4757fec2819329abb98/numba-0.61.2-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:efd3db391df53aaa5cfbee189b6c910a5b471488749fd6606c3f33fc984c2ae2", size = 2775825 }, + { url = "https://files.pythonhosted.org/packages/95/9e/63c549f37136e892f006260c3e2613d09d5120672378191f2dc387ba65a2/numba-0.61.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:49c980e4171948ffebf6b9a2520ea81feed113c1f4890747ba7f59e74be84b1b", size = 2778695 }, + { url = "https://files.pythonhosted.org/packages/97/c8/8740616c8436c86c1b9a62e72cb891177d2c34c2d24ddcde4c390371bf4c/numba-0.61.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3945615cd73c2c7eba2a85ccc9c1730c21cd3958bfcf5a44302abae0fb07bb60", size = 3829227 }, + { url = "https://files.pythonhosted.org/packages/fc/06/66e99ae06507c31d15ff3ecd1f108f2f59e18b6e08662cd5f8a5853fbd18/numba-0.61.2-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:bbfdf4eca202cebade0b7d43896978e146f39398909a42941c9303f82f403a18", size = 3523422 }, + { url = "https://files.pythonhosted.org/packages/0f/a4/2b309a6a9f6d4d8cfba583401c7c2f9ff887adb5d54d8e2e130274c0973f/numba-0.61.2-cp311-cp311-win_amd64.whl", hash = "sha256:76bcec9f46259cedf888041b9886e257ae101c6268261b19fda8cfbc52bec9d1", size = 2831505 }, + { url = "https://files.pythonhosted.org/packages/b4/a0/c6b7b9c615cfa3b98c4c63f4316e3f6b3bbe2387740277006551784218cd/numba-0.61.2-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:34fba9406078bac7ab052efbf0d13939426c753ad72946baaa5bf9ae0ebb8dd2", size = 2776626 }, + { url = "https://files.pythonhosted.org/packages/92/4a/fe4e3c2ecad72d88f5f8cd04e7f7cff49e718398a2fac02d2947480a00ca/numba-0.61.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4ddce10009bc097b080fc96876d14c051cc0c7679e99de3e0af59014dab7dfe8", size = 2779287 }, + { url = "https://files.pythonhosted.org/packages/9a/2d/e518df036feab381c23a624dac47f8445ac55686ec7f11083655eb707da3/numba-0.61.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b1bb509d01f23d70325d3a5a0e237cbc9544dd50e50588bc581ba860c213546", size = 3885928 }, + { url = "https://files.pythonhosted.org/packages/10/0f/23cced68ead67b75d77cfcca3df4991d1855c897ee0ff3fe25a56ed82108/numba-0.61.2-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:48a53a3de8f8793526cbe330f2a39fe9a6638efcbf11bd63f3d2f9757ae345cd", size = 3577115 }, + { url = "https://files.pythonhosted.org/packages/68/1d/ddb3e704c5a8fb90142bf9dc195c27db02a08a99f037395503bfbc1d14b3/numba-0.61.2-cp312-cp312-win_amd64.whl", hash = "sha256:97cf4f12c728cf77c9c1d7c23707e4d8fb4632b46275f8f3397de33e5877af18", size = 2831929 }, + { url = "https://files.pythonhosted.org/packages/0b/f3/0fe4c1b1f2569e8a18ad90c159298d862f96c3964392a20d74fc628aee44/numba-0.61.2-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:3a10a8fc9afac40b1eac55717cece1b8b1ac0b946f5065c89e00bde646b5b154", size = 2771785 }, + { url = "https://files.pythonhosted.org/packages/e9/71/91b277d712e46bd5059f8a5866862ed1116091a7cb03bd2704ba8ebe015f/numba-0.61.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7d3bcada3c9afba3bed413fba45845f2fb9cd0d2b27dd58a1be90257e293d140", size = 2773289 }, + { url = "https://files.pythonhosted.org/packages/0d/e0/5ea04e7ad2c39288c0f0f9e8d47638ad70f28e275d092733b5817cf243c9/numba-0.61.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bdbca73ad81fa196bd53dc12e3aaf1564ae036e0c125f237c7644fe64a4928ab", size = 3893918 }, + { url = "https://files.pythonhosted.org/packages/17/58/064f4dcb7d7e9412f16ecf80ed753f92297e39f399c905389688cf950b81/numba-0.61.2-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:5f154aaea625fb32cfbe3b80c5456d514d416fcdf79733dd69c0df3a11348e9e", size = 3584056 }, + { url = "https://files.pythonhosted.org/packages/af/a4/6d3a0f2d3989e62a18749e1e9913d5fa4910bbb3e3311a035baea6caf26d/numba-0.61.2-cp313-cp313-win_amd64.whl", hash = "sha256:59321215e2e0ac5fa928a8020ab00b8e57cda8a97384963ac0dfa4d4e6aa54e7", size = 2831846 }, ] [[package]] @@ -2227,6 +2231,128 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/86/8a/69176a64335aed183529207ba8bc3d329c2999d852b4f3818027203f50e6/opencv_python_headless-4.11.0.86-cp37-abi3-win_amd64.whl", hash = "sha256:6c304df9caa7a6a5710b91709dd4786bf20a74d57672b3c31f7033cc638174ca", size = 39402386 }, ] +[[package]] +name = "opentelemetry-api" +version = "1.26.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "deprecated" }, + { name = "importlib-metadata" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/48/d4/e9a0ddef6eed086c96e8265d864a46da099611b7be153b0cfb63fd47e1b4/opentelemetry_api-1.26.0.tar.gz", hash = "sha256:2bd639e4bed5b18486fef0b5a520aaffde5a18fc225e808a1ac4df363f43a1ce", size = 60904 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e3/a7/6322d1d7a1fb926e8b99208c27730f21217da2f1e0e11dab48a78a0427a4/opentelemetry_api-1.26.0-py3-none-any.whl", hash = "sha256:7d7ea33adf2ceda2dd680b18b1677e4152000b37ca76e679da71ff103b943064", size = 61533 }, +] + +[[package]] +name = "opentelemetry-exporter-otlp" +version = "1.26.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-exporter-otlp-proto-grpc" }, + { name = "opentelemetry-exporter-otlp-proto-http" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/be/99/80edf6286f9040fadf065f9a11869fda34449a61e62a5372cb84d5a6f53b/opentelemetry_exporter_otlp-1.26.0.tar.gz", hash = "sha256:cf0e093f080011951d9f97431a83869761e4d4ebe83a4195ee92d7806223299c", size = 6168 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/87/71/b9221af6af61213c522401b5f46a5eaa41d8dd7daeb0740dc5604f5c3980/opentelemetry_exporter_otlp-1.26.0-py3-none-any.whl", hash = "sha256:f839989f54bda85ee33c5dae033c44dcec9ccbb0dafc6a43d585df44da1d2036", size = 7001 }, +] + +[[package]] +name = "opentelemetry-exporter-otlp-proto-common" +version = "1.26.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-proto" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/84/cd/ed9eaa1d80facb6609d02af6c393b02ce3797a15742361be4859db6fdc17/opentelemetry_exporter_otlp_proto_common-1.26.0.tar.gz", hash = "sha256:bdbe50e2e22a1c71acaa0c8ba6efaadd58882e5a5978737a44a4c4b10d304c92", size = 17815 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/25/2f/0f7e0a73fd901c9abc6ea680d7f19a803dac830c450f21e1123d3a3ec488/opentelemetry_exporter_otlp_proto_common-1.26.0-py3-none-any.whl", hash = "sha256:ee4d8f8891a1b9c372abf8d109409e5b81947cf66423fd998e56880057afbc71", size = 17837 }, +] + +[[package]] +name = "opentelemetry-exporter-otlp-proto-grpc" +version = "1.26.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "deprecated" }, + { name = "googleapis-common-protos" }, + { name = "grpcio" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-exporter-otlp-proto-common" }, + { name = "opentelemetry-proto" }, + { name = "opentelemetry-sdk" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a0/23/cac89aca97ecb8f7498a875dc2ac89224b4f3345bcb8ffff643b59886196/opentelemetry_exporter_otlp_proto_grpc-1.26.0.tar.gz", hash = "sha256:a65b67a9a6b06ba1ec406114568e21afe88c1cdb29c464f2507d529eb906d8ae", size = 25239 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4d/0c/e4473692fec8076008c7926dfcef7223fc6d2785f04ad9d8402347a4eba9/opentelemetry_exporter_otlp_proto_grpc-1.26.0-py3-none-any.whl", hash = "sha256:e2be5eff72ebcb010675b818e8d7c2e7d61ec451755b8de67a140bc49b9b0280", size = 18228 }, +] + +[[package]] +name = "opentelemetry-exporter-otlp-proto-http" +version = "1.26.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "deprecated" }, + { name = "googleapis-common-protos" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-exporter-otlp-proto-common" }, + { name = "opentelemetry-proto" }, + { name = "opentelemetry-sdk" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/42/d2/4e6e2066b87626966f99f8fc7fcb9414e7548779d751def7db54c9d25b1c/opentelemetry_exporter_otlp_proto_http-1.26.0.tar.gz", hash = "sha256:5801ebbcf7b527377883e6cbbdda35ee712dc55114fff1e93dfee210be56c908", size = 14451 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cf/d3/0b7217b61903249035d219fbe93a8558287f86aead340c7b2dc1226b8ad4/opentelemetry_exporter_otlp_proto_http-1.26.0-py3-none-any.whl", hash = "sha256:ee72a87c48ec977421b02f16c52ea8d884122470e0be573905237b540f4ee562", size = 16795 }, +] + +[[package]] +name = "opentelemetry-proto" +version = "1.26.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "protobuf" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a9/06/9505ef04e527fa711ebffb47f3f56cac6015405953ff688fc349d170fb9c/opentelemetry_proto-1.26.0.tar.gz", hash = "sha256:c5c18796c0cab3751fc3b98dee53855835e90c0422924b484432ac852d93dc1e", size = 34749 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/15/f4/66a3892eea913cded9bac0fdd3fb1a412fa2da8eb50014ec87a52648444a/opentelemetry_proto-1.26.0-py3-none-any.whl", hash = "sha256:6c4d7b4d4d9c88543bcf8c28ae3f8f0448a753dc291c18c5390444c90b76a725", size = 52466 }, +] + +[[package]] +name = "opentelemetry-sdk" +version = "1.26.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d3/85/8ca0d5ebfe708287b091dffcd15553b74bbfe4532f8dd42662b78b2e0cab/opentelemetry_sdk-1.26.0.tar.gz", hash = "sha256:c90d2868f8805619535c05562d699e2f4fb1f00dbd55a86dcefca4da6fa02f85", size = 143139 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/92/f1/a9b550d0f9c049653dd2eab45cecf8fe4baa9795ed143d87834056ffabaf/opentelemetry_sdk-1.26.0-py3-none-any.whl", hash = "sha256:feb5056a84a88670c041ea0ded9921fca559efec03905dddeb3885525e0af897", size = 109475 }, +] + +[[package]] +name = "opentelemetry-semantic-conventions" +version = "0.47b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "deprecated" }, + { name = "opentelemetry-api" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/93/85/edef14d10ad00ddd9fffb20e4d3d938f4c5c1247e11a175066fe2b4a72f8/opentelemetry_semantic_conventions-0.47b0.tar.gz", hash = "sha256:a8d57999bbe3495ffd4d510de26a97dadc1dace53e0275001b2c1b2f67992a7e", size = 83994 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/00/c2/ca5cef8e4cd8eec5a95deed95ec3f6005e499fd9d17ca08731ced03a6921/opentelemetry_semantic_conventions-0.47b0-py3-none-any.whl", hash = "sha256:4ff9d595b85a59c1c1413f02bba320ce7ea6bf9e2ead2b0913c4395c7bbc1063", size = 138027 }, +] + +[[package]] +name = "opentelemetry-semantic-conventions-ai" +version = "0.4.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2b/8f/7fb173fd1928398b81d0952f7a9f30381ce3215817e3ac6e92f180434874/opentelemetry_semantic_conventions_ai-0.4.3.tar.gz", hash = "sha256:761a68a7e99436dfc53cfe1f99507316aa0114ac480f0c42743b9320b7c94831", size = 4540 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/95/56/b178de82b650526ff5d5e67037786008ea0acd043051d535c483dabd3cc4/opentelemetry_semantic_conventions_ai-0.4.3-py3-none-any.whl", hash = "sha256:9ff60bbf38c8a891c20a355b4ca1948380361e27412c3ead264de0d050fa2570", size = 5384 }, +] + [[package]] name = "outlines" version = "0.1.11" @@ -2577,16 +2703,16 @@ wheels = [ [[package]] name = "protobuf" -version = "5.29.4" +version = "4.25.6" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/17/7d/b9dca7365f0e2c4fa7c193ff795427cfa6290147e5185ab11ece280a18e7/protobuf-5.29.4.tar.gz", hash = "sha256:4f1dfcd7997b31ef8f53ec82781ff434a28bf71d9102ddde14d076adcfc78c99", size = 424902 } +sdist = { url = "https://files.pythonhosted.org/packages/48/d5/cccc7e82bbda9909ced3e7a441a24205ea07fea4ce23a772743c0c7611fa/protobuf-4.25.6.tar.gz", hash = "sha256:f8cfbae7c5afd0d0eaccbe73267339bff605a2315860bb1ba08eb66670a9a91f", size = 380631 } wheels = [ - { url = "https://files.pythonhosted.org/packages/9a/b2/043a1a1a20edd134563699b0e91862726a0dc9146c090743b6c44d798e75/protobuf-5.29.4-cp310-abi3-win32.whl", hash = "sha256:13eb236f8eb9ec34e63fc8b1d6efd2777d062fa6aaa68268fb67cf77f6839ad7", size = 422709 }, - { url = "https://files.pythonhosted.org/packages/79/fc/2474b59570daa818de6124c0a15741ee3e5d6302e9d6ce0bdfd12e98119f/protobuf-5.29.4-cp310-abi3-win_amd64.whl", hash = "sha256:bcefcdf3976233f8a502d265eb65ea740c989bacc6c30a58290ed0e519eb4b8d", size = 434506 }, - { url = "https://files.pythonhosted.org/packages/46/de/7c126bbb06aa0f8a7b38aaf8bd746c514d70e6a2a3f6dd460b3b7aad7aae/protobuf-5.29.4-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:307ecba1d852ec237e9ba668e087326a67564ef83e45a0189a772ede9e854dd0", size = 417826 }, - { url = "https://files.pythonhosted.org/packages/a2/b5/bade14ae31ba871a139aa45e7a8183d869efe87c34a4850c87b936963261/protobuf-5.29.4-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:aec4962f9ea93c431d5714ed1be1c93f13e1a8618e70035ba2b0564d9e633f2e", size = 319574 }, - { url = "https://files.pythonhosted.org/packages/46/88/b01ed2291aae68b708f7d334288ad5fb3e7aa769a9c309c91a0d55cb91b0/protobuf-5.29.4-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:d7d3f7d1d5a66ed4942d4fefb12ac4b14a29028b209d4bfb25c68ae172059922", size = 319672 }, - { url = "https://files.pythonhosted.org/packages/12/fb/a586e0c973c95502e054ac5f81f88394f24ccc7982dac19c515acd9e2c93/protobuf-5.29.4-py3-none-any.whl", hash = "sha256:3fde11b505e1597f71b875ef2fc52062b6a9740e5f7c8997ce878b6009145862", size = 172551 }, + { url = "https://files.pythonhosted.org/packages/42/41/0ff3559d9a0fbdb37c9452f2b84e61f7784d8d7b9850182c7ef493f523ee/protobuf-4.25.6-cp310-abi3-win32.whl", hash = "sha256:61df6b5786e2b49fc0055f636c1e8f0aff263808bb724b95b164685ac1bcc13a", size = 392454 }, + { url = "https://files.pythonhosted.org/packages/79/84/c700d6c3f3be770495b08a1c035e330497a31420e4a39a24c22c02cefc6c/protobuf-4.25.6-cp310-abi3-win_amd64.whl", hash = "sha256:b8f837bfb77513fe0e2f263250f423217a173b6d85135be4d81e96a4653bcd3c", size = 413443 }, + { url = "https://files.pythonhosted.org/packages/b7/03/361e87cc824452376c2abcef0eabd18da78a7439479ec6541cf29076a4dc/protobuf-4.25.6-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:6d4381f2417606d7e01750e2729fe6fbcda3f9883aa0c32b51d23012bded6c91", size = 394246 }, + { url = "https://files.pythonhosted.org/packages/64/d5/7dbeb69b74fa88f297c6d8f11b7c9cef0c2e2fb1fdf155c2ca5775cfa998/protobuf-4.25.6-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:5dd800da412ba7f6f26d2c08868a5023ce624e1fdb28bccca2dc957191e81fb5", size = 293714 }, + { url = "https://files.pythonhosted.org/packages/d4/f0/6d5c100f6b18d973e86646aa5fc09bc12ee88a28684a56fd95511bceee68/protobuf-4.25.6-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:4434ff8bb5576f9e0c78f47c41cdf3a152c0b44de475784cd3fd170aef16205a", size = 294634 }, + { url = "https://files.pythonhosted.org/packages/71/eb/be11a1244d0e58ee04c17a1f939b100199063e26ecca8262c04827fe0bf5/protobuf-4.25.6-py3-none-any.whl", hash = "sha256:07972021c8e30b870cfc0863409d033af940213e0e7f64e27fe017b929d2c9f7", size = 156466 }, ] [[package]] @@ -4205,7 +4331,7 @@ wheels = [ [[package]] name = "vllm" -version = "0.8.3" +version = "0.8.4" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohttp" }, @@ -4230,6 +4356,10 @@ dependencies = [ { name = "numpy" }, { name = "openai" }, { name = "opencv-python-headless" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-exporter-otlp" }, + { name = "opentelemetry-sdk" }, + { name = "opentelemetry-semantic-conventions-ai" }, { name = "outlines" }, { name = "partial-json-parser" }, { name = "pillow" }, @@ -4260,9 +4390,9 @@ dependencies = [ { name = "xformers", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "xgrammar", marker = "platform_machine == 'aarch64' or platform_machine == 'x86_64'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/62/ef/238efdf161d527e7872f1792f731fbddcc17ad6362dd43b23dd6c91add1c/vllm-0.8.3.tar.gz", hash = "sha256:475a39d1093b8ef8a905d63eafe0c6c9b8f4f4c2ae2d23f1f3d0fae5e37bb4bd", size = 6618606 } +sdist = { url = "https://files.pythonhosted.org/packages/e6/d6/9d412cdaa92c3ab6250cef51217d37395b2aa372c6c14f90b1668adbbf63/vllm-0.8.4.tar.gz", hash = "sha256:522b13dd16c6c773dec0cb4c42ea591623d03ef94d16db8128ece2600017e6ac", size = 6667631 } wheels = [ - { url = "https://files.pythonhosted.org/packages/2a/99/58ba40e42ec6358ff4da5b6b6ce2ac9f8b10329fcfd65c9ee12c124f37f9/vllm-0.8.3-cp38-abi3-manylinux1_x86_64.whl", hash = "sha256:5488af1cf912ca8a7fad622512e0502235f5377ee36571c04361cbc31105c811", size = 294034759 }, + { url = "https://files.pythonhosted.org/packages/8e/cb/03dc1299e0456ff3d58a11f63682ef29aaf5b1bd7f21bfe0690d7ce6fc40/vllm-0.8.4-cp38-abi3-manylinux1_x86_64.whl", hash = "sha256:e346749ee8df48cdcd935d00a7fc123a1e17d9904b064401e74fc6ad73b8104a", size = 294098962 }, ] [[package]] @@ -4515,38 +4645,37 @@ wheels = [ [[package]] name = "xgrammar" -version = "0.1.17" +version = "0.1.18" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nanobind" }, { name = "ninja" }, { name = "pydantic" }, { name = "sentencepiece" }, { name = "tiktoken" }, { name = "torch" }, { name = "transformers" }, - { name = "triton", marker = "platform_machine == 'x86_64' and platform_system == 'linux'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/e6/f9/6d530ce703cf5aae65d594a5ab984b9c0c4956e6fdbcc3279e8b1eaa358e/xgrammar-0.1.17.tar.gz", hash = "sha256:8f6cd7b3436482ad8c94b6cc93892a7f36381315c443e8e7f256f8d71c3efdee", size = 1679977 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/13/ca/61c54819ba1b00c5c189d6bd24e4f9b4ab6d334f18b339fd21397b1ccc11/xgrammar-0.1.17-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:829ab14ab2dee067955a3e55639f5f2c2ca4c5a4a6cb60a24b6655bf995f50e4", size = 372103 }, - { url = "https://files.pythonhosted.org/packages/14/18/b34ab691f65389b9939c49ac1188517194c3dadfa3a6ac3f5627226789bc/xgrammar-0.1.17-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cee7985c536d0648e774846ed7e59fd4bea0bcc03b1654d04e723000954308e4", size = 341599 }, - { url = "https://files.pythonhosted.org/packages/53/38/f805fd4eaafd78fac029bd14bf3ac243854c2afccc71c34c6942e6be5439/xgrammar-0.1.17-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7c80b26ee041a49a7a0d20c05cf09c05937713c4c2c2d04a24b85ae76ee23d9b", size = 4234957 }, - { url = "https://files.pythonhosted.org/packages/58/20/21b5e35d20b6889a403f610aefb1306798c13de0c8d76c7a8bdff5608000/xgrammar-0.1.17-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ba897085b2d3dc8f9ffadfc66224e8031f05db91c142a7e7a0be984306a7fc1", size = 4308431 }, - { url = "https://files.pythonhosted.org/packages/8b/90/004b58a55fdb782f98ed27e591786e78475ead9fb25774dab0a101df5a5a/xgrammar-0.1.17-cp310-cp310-win_amd64.whl", hash = "sha256:d1dc8e880f01ec8f22414542af304446c764c00667aae98e10053d4fc14d1f57", size = 422436 }, - { url = "https://files.pythonhosted.org/packages/53/bd/0abe8e01a3390feb60e9e1799f91b0c2a873c2ff1fa87052c18492b3b71b/xgrammar-0.1.17-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:cfd95f0a8dc7f025921d93fed9c78b3b0dfb28e89b3e9e37c393470ca57352e0", size = 371921 }, - { url = "https://files.pythonhosted.org/packages/96/ee/71fe485df88d111c26e265000f19b4521abf5660278f283ebed671977261/xgrammar-0.1.17-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:98cfd1efe13e446a5d96741202db375a8c807630c95624976889e6831e94c675", size = 341466 }, - { url = "https://files.pythonhosted.org/packages/91/6e/2592870e0a2c061ac7ea5607e82ed5f30daa05dee1896297b4f19e77e9bd/xgrammar-0.1.17-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:873d13f863561ac902f938da63201d81a1f6424365c7f89fb15910a7147b3ec0", size = 4236127 }, - { url = "https://files.pythonhosted.org/packages/f1/05/a31e2f04b0cb510f867da3094b35dc893622debbe1254e02accf6683c7aa/xgrammar-0.1.17-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87940387b4850b4e5e1f68888f9ce1e7236f94dbbf1ba3ebcd08a9a5cab0d66d", size = 4309348 }, - { url = "https://files.pythonhosted.org/packages/c5/3a/1afa276678a9e050323e9ab3013e0ca25df02ff24ced496c8ccec93749bd/xgrammar-0.1.17-cp311-cp311-win_amd64.whl", hash = "sha256:3505efb81a6a2b59b843b99c6c0bc09dc0d924307c18c0de693a919fe10066d6", size = 422201 }, - { url = "https://files.pythonhosted.org/packages/c7/32/deaee8f04d24bc2ed38c14fb01d6faa2319fb361353bbbebac4bdf801ac6/xgrammar-0.1.17-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:cc8e1e4a3298aae9856416e1366ccd07d4c6b5556921ecd108c579b1184522d2", size = 371412 }, - { url = "https://files.pythonhosted.org/packages/35/ed/59a89ef003235f746fa989bf82e8425e6b046d65349feacd1b57b4763141/xgrammar-0.1.17-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a7712942793727f0c490f6f2388d5995632cc0c8258a7aff33577ff0f47bc513", size = 340973 }, - { url = "https://files.pythonhosted.org/packages/48/bc/f6f5f16d9cb57684f23a62d3f51deed410da6c9708bf3d5eb679dd867dc0/xgrammar-0.1.17-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b3e998ab30662b5f090978d04928f20467df973116c17624f868fa7717ff683", size = 4236280 }, - { url = "https://files.pythonhosted.org/packages/8a/89/8d4b7a8bf5af80564081555f1734d668e5496e90171280de9153d0696065/xgrammar-0.1.17-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1687ce767c5ca0fe101f699c2691762a037a6b0159608f6c4a720bccdb57ee8a", size = 4310624 }, - { url = "https://files.pythonhosted.org/packages/27/37/8e31a5a44b21e89755795103df04fadb390db395c9fe65179acc9bf067b4/xgrammar-0.1.17-cp312-cp312-win_amd64.whl", hash = "sha256:9572b4c571cf39f6ffd29915b73d3cc13303c72aa86043660f46f66746b5b947", size = 421404 }, - { url = "https://files.pythonhosted.org/packages/62/22/c0eab43801aba25046b3ea74cd3575560086c56a78f4be13033c76735c22/xgrammar-0.1.17-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:668171673af6244108e3ec6317bca592e627be3a57d4c250bd1ce78a23d4d127", size = 340909 }, - { url = "https://files.pythonhosted.org/packages/b5/07/787c48716e9dddbc4beea6c22a5e25f952d6680937788065dec0354b7d74/xgrammar-0.1.17-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9dbe59d8b3bc44ec092914cda20728b69a73b2979596f2f0a7b868aaecd234b0", size = 4309322 }, - { url = "https://files.pythonhosted.org/packages/86/2e/5677e586427b9d32715d5ef672429f5e111d7531bc289b96945e95041c3d/xgrammar-0.1.17-cp313-cp313-win_amd64.whl", hash = "sha256:fd2f044eec970db462932fd736330bb76060d41fa6cc23e000f486b53fbdcf34", size = 421329 }, + { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8f/c3/22c9eeab6ee1dd6d0513d227e9d307fd20a0491db58f1f04bc5d566d13dc/xgrammar-0.1.18.tar.gz", hash = "sha256:a0438a0f9262fff1d0e4f184268eb759f094243edce92b67eb7aa5f245c47471", size = 1697230 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/58/9a/11a6c75c009d3b21647fa10b5706ad3acec7be9804b3798a4d5e466fd13d/xgrammar-0.1.18-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:61649e9e43edcde62b4bd6ebe2f3c46c89bfff8655283bff0efd72838661619f", size = 416032 }, + { url = "https://files.pythonhosted.org/packages/d4/9d/7ce9cbca36e8b5ccb9cfbe6515ab6b16fd2faa73d06135a49e359601ea65/xgrammar-0.1.18-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:787781a002d55c0d70c3a17736eeb8aaea0fc5adb5897d333a96972d80ae3afb", size = 382849 }, + { url = "https://files.pythonhosted.org/packages/e7/6f/663a041774e1a902f734902893256c672b8688d5e06ef6e6dcc7dffda039/xgrammar-0.1.18-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:669afa9984f67c7b392da39d90fa539e7c829408bc6794333c5108afc39039a0", size = 4730195 }, + { url = "https://files.pythonhosted.org/packages/ff/a1/762cc02193327cce5ccc859b0b445045052663490f5c29f0d81edcb2a156/xgrammar-0.1.18-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ed09c2df0a3c57e27094a7f63b53178da38ec064d7e683c42519811b987ca48", size = 4823096 }, + { url = "https://files.pythonhosted.org/packages/f3/70/696e41f1c22b8f2d54d2da3771892b18cf65474dc0966a64d1c70a9afeb6/xgrammar-0.1.18-cp310-cp310-win_amd64.whl", hash = "sha256:88cb2747c21bb5c97b5350d4d69eafa248c31610a81bfe316eadee68a83b03b4", size = 459871 }, + { url = "https://files.pythonhosted.org/packages/ae/0d/f9f969b885fb90dc9d66a9c81a6c8a4625c02bcf712a10cdda5afcdafee9/xgrammar-0.1.18-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:90686061cad7ba2af07d7386e406f1432f549e033f2c8752d3846712ee51184a", size = 415920 }, + { url = "https://files.pythonhosted.org/packages/d9/2b/6103e4e5e234def44004fc96343ccc16fc980ab527b82d3ac06643f4969e/xgrammar-0.1.18-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9e4d9d55f3b72203cb916f8300c4d66e7d3d01d680565974fd71a5451d1b9296", size = 382680 }, + { url = "https://files.pythonhosted.org/packages/3b/38/1db68bd49c845bfae3659dacf8084837296be548bce6727198cb22e174bd/xgrammar-0.1.18-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cbea4280c9faa766c417c450427b4aec9025a4e5df38a46ec21ba7f9e426343", size = 4727368 }, + { url = "https://files.pythonhosted.org/packages/56/73/ba7bd8db631d3bbf224599d32587a2b94c4b4c539c47aa7b0ee2f8764d72/xgrammar-0.1.18-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11512dd0f9000dd879b6f5dd222e1105ffc641b8b83d5949ef6550e41e2d84ce", size = 4824156 }, + { url = "https://files.pythonhosted.org/packages/ea/97/383f1caeb52feac996ae30d04885080dc9843aa771f3ec494d06c950b7d9/xgrammar-0.1.18-cp311-cp311-win_amd64.whl", hash = "sha256:cf46bca542dea882dbaa6029a2420a8fbf6a721871007f6c43af4b4be1bbbe84", size = 459490 }, + { url = "https://files.pythonhosted.org/packages/a7/c3/376dca626625f2ae13689cb51708b71e0507f1e048cf475b22580034b3a8/xgrammar-0.1.18-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:cce11c2c497dc58d9f720f943d09e6f9d30fd8f454a8886541d4e03130c9d275", size = 415376 }, + { url = "https://files.pythonhosted.org/packages/97/05/d9e5081f40cc0fb3b450a293eb8a3d53ff61eded4edd371094cf520189b7/xgrammar-0.1.18-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:56070583288729b71b9bc3c156ec62ea9a4da1a5f06419bba7ab09e4b3b65102", size = 381451 }, + { url = "https://files.pythonhosted.org/packages/0d/fc/f2adecd8293947a17555827d71836002265e43d20999db028ce9aad93c95/xgrammar-0.1.18-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:acd7ef426f22e910f247a6ab772eb6121c06e2d9d59c3a6d6adbc117c00717cd", size = 4728909 }, + { url = "https://files.pythonhosted.org/packages/8f/c3/54acf006969aae4b0f3760998f0a9695fa4cadb5044e783ee9af40a1d2cc/xgrammar-0.1.18-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ac7ef1f74af7bedc6cf992b4f9f5ea6f5a736ce17a3abb229108a3538e92000", size = 4825327 }, + { url = "https://files.pythonhosted.org/packages/cb/16/a9dd9cce4ede5ee1d71c30d3d6960abd730f4322d6aec025f9f1bd102812/xgrammar-0.1.18-cp312-cp312-win_amd64.whl", hash = "sha256:c16ceebd093eae90437703ec7bbb635a76371dd66adae526143154bfb948e835", size = 458936 }, + { url = "https://files.pythonhosted.org/packages/a0/8a/2bf99321c2eccc456d2d11d098b58d1fa3214bd81152eae3745bfce9675d/xgrammar-0.1.18-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2abb7f326a28c8d19cb072d7989e3e473e37f0c151157154b216a53dd4324b41", size = 381471 }, + { url = "https://files.pythonhosted.org/packages/d1/cf/d59bd0a13583a9827a74ea5ec067b05a0be016b198458f6f57ae2e2eb092/xgrammar-0.1.18-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c6a48a09f875e5a10c3872cb291c46b73ecd5278fccf9695514384a9e59a3fe", size = 4824347 }, + { url = "https://files.pythonhosted.org/packages/21/28/7e434b349fc81f9a7e5938fe8a84bb3fb44e28304ee58ba68362f3936e90/xgrammar-0.1.18-cp313-cp313-win_amd64.whl", hash = "sha256:7da855fd8188aafdd4f7228726dc1e0c6069b7a932205b13df737201b93c8029", size = 458872 }, ] [[package]] From bf84d44b3ae2f772f88401135777830b84206ee2 Mon Sep 17 00:00:00 2001 From: Yuki Huang Date: Mon, 21 Apr 2025 09:07:32 +0000 Subject: [PATCH 08/13] group streaming Signed-off-by: Yuki Huang group refit tensor by size instead of count Signed-off-by: Yuki Huang update get_weights_ipc_handles Signed-off-by: Yuki Huang update fsdp1 and debug log Signed-off-by: Yuki Huang --- nemo_reinforcer/algorithms/grpo.py | 34 +++++++++++- .../models/generation/vllm_backend.py | 20 ++++--- .../models/policy/dtensor_policy_worker.py | 53 +++++++++++------- .../models/policy/fsdp1_policy_worker.py | 55 ++++++++++++------- nemo_reinforcer/models/policy/hf_policy.py | 2 +- 5 files changed, 109 insertions(+), 55 deletions(-) diff --git a/nemo_reinforcer/algorithms/grpo.py b/nemo_reinforcer/algorithms/grpo.py index 53f8d249aa..e67edd7f19 100644 --- a/nemo_reinforcer/algorithms/grpo.py +++ b/nemo_reinforcer/algorithms/grpo.py @@ -271,20 +271,48 @@ def setup( # =============================================================================== +import time def refit_policy_generation( policy: PolicyInterface, policy_generation: GenerationInterface, + refit_buffer_size: int = 10, # GB ): """Refit the policy generation interface with the latest policy weights.""" + s = time.time() policy.offload_before_refit() + print(f"[offload_before_refit] {time.time() - s}s") + s = time.time() policy_generation.prepare_for_generation(tags=["weights"]) + print(f"[prepare_for_generation - weights] {time.time() - s}s") + s = time.time() # Streaming update weights to save memory - param_keys = policy.prepare_weights_for_ipc() - for key in param_keys: - ipc_handles = policy.get_weights_ipc_handles(key) + state_dict_info = policy.prepare_weights_for_ipc() + # group keys to save time + available_bytes = refit_buffer_size * (1024 ** 3) + split_keys, keys = [], [] + for key, size_in_bytes in state_dict_info: + keys.append(key) + available_bytes -= size_in_bytes + if available_bytes <= 0: + split_keys.append(keys) + keys = [] + available_bytes = refit_buffer_size * (1024 ** 3) + if len(keys) > 0: + split_keys.append(keys) + print(f"[prepare_weights_for_ipc] {time.time() - s}s") + s = time.time() + # do update + for keys in split_keys: + ipc_handles = policy.get_weights_ipc_handles(keys) policy_generation.update_weights(ipc_handles) + print(f"[update_weights] {time.time() - s}s") + s = time.time() policy.offload_after_refit() + print(f"[offload_after_refit] {time.time() - s}s") + s = time.time() policy_generation.prepare_for_generation(tags=["kv_cache"]) + print(f"[prepare_for_generation - kv_cache] {time.time() - s}s") + s = time.time() def generate_responses( diff --git a/nemo_reinforcer/models/generation/vllm_backend.py b/nemo_reinforcer/models/generation/vllm_backend.py index 662fa7d21c..8792a620b5 100644 --- a/nemo_reinforcer/models/generation/vllm_backend.py +++ b/nemo_reinforcer/models/generation/vllm_backend.py @@ -40,19 +40,21 @@ def update_weights_from_ipc_handles(self, ipc_handles): try: # Get handles for this device device_uuid = self.report_device_id() - named_handle = ipc_handles[device_uuid] + named_handles = ipc_handles[device_uuid] device_id = self.device.index - # Process each handle to get the tensor - name, handle = named_handle - func, args = handle - list_args = list(args) - # Update device ID to match the current device - list_args[6] = device_id - tensor = func(*list_args) + weights = [] + for name, handle in named_handles: + # Process each handle to get the tensor + func, args = handle + list_args = list(args) + # Update device ID to match the current device + list_args[6] = device_id + tensor = func(*list_args) + weights.append((name, tensor)) # Load weights into the model - self.model_runner.model.load_weights(weights=[(name, tensor)]) + self.model_runner.model.load_weights(weights=weights) torch.cuda.synchronize() return True except Exception as e: diff --git a/nemo_reinforcer/models/policy/dtensor_policy_worker.py b/nemo_reinforcer/models/policy/dtensor_policy_worker.py index a0bc159ad0..2822f80bb6 100644 --- a/nemo_reinforcer/models/policy/dtensor_policy_worker.py +++ b/nemo_reinforcer/models/policy/dtensor_policy_worker.py @@ -176,7 +176,7 @@ def __init__( # used for streaming update inference engine weights self._held_sharded_state_dict_reference = None - self._held_single_streamed_param_reference = None + self._held_streamed_param_reference = None if init_reference_model: self.reference_model_state_dict = get_cpu_state_dict( @@ -541,31 +541,42 @@ def report_device_id(self) -> str: def prepare_weights_for_ipc(self): self.model = self.move_to_cuda(self.model) self._held_sharded_state_dict_reference = self.model.state_dict() - return self._held_sharded_state_dict_reference.keys() + # Collect info for streaming multiple tensors + state_dict_info = [] + for name, tensor in self._held_sharded_state_dict_reference.items(): + # dtensor's numel will return complete tensor instead of only local tensor + size_in_bytes = tensor.element_size() * tensor.numel() + state_dict_info.append((name, size_in_bytes)) + return state_dict_info @torch.no_grad() - def get_weights_ipc_handles(self, key): + def get_weights_ipc_handles(self, keys): from torch.multiprocessing.reductions import reduce_tensor - # Get device UUID for IPC - device_uuid = self.report_device_id() - - # Get full_tensor for dtensor (GPU > 1) - tensor = self._held_sharded_state_dict_reference[key] - if isinstance(tensor, DTensor): - full_tensor = tensor.full_tensor() - else: - full_tensor = tensor + converted_params = {} + for key in keys: + # Get full_tensor for dtensor (GPU > 1) + tensor = self._held_sharded_state_dict_reference[key] + if isinstance(tensor, DTensor): + full_tensor = tensor.full_tensor() + else: + full_tensor = tensor + # Convert parameters to the configured dtype + converted_params[key] = full_tensor.to(self.dtype, non_blocking=True) - # Convert parameters to the configured dtype - full_tensor = full_tensor.to(self.dtype, non_blocking=True) # Temporary record the full tensor for cleanup # It is needed for cleanup the last full_tensor in the refit process - self._held_single_streamed_param_reference = full_tensor + self._held_streamed_param_reference = converted_params + + # Get device UUID for IPC + device_uuid = self.report_device_id() + # Create handles for the tensors + all_handles = [] + for key, p in converted_params.items(): + handle = reduce_tensor(p.detach()) + all_handles.append((key, handle)) - # Create a handle for the tensor - handle = reduce_tensor(full_tensor.detach()) - return {device_uuid: (key, handle)} + return {device_uuid: all_handles} def prepare_for_lp_inference(self): if not self.cpu_offload: @@ -627,9 +638,9 @@ def offload_after_refit(self): if self._held_sharded_state_dict_reference is not None: del self._held_sharded_state_dict_reference self._held_sharded_state_dict_reference = None - if self._held_single_streamed_param_reference is not None: - del self._held_single_streamed_param_reference - self._held_single_streamed_param_reference = None + if self._held_streamed_param_reference is not None: + del self._held_streamed_param_reference + self._held_streamed_param_reference = None gc.collect() torch.cuda.empty_cache() diff --git a/nemo_reinforcer/models/policy/fsdp1_policy_worker.py b/nemo_reinforcer/models/policy/fsdp1_policy_worker.py index 1aa49c0787..aed71d7dbd 100644 --- a/nemo_reinforcer/models/policy/fsdp1_policy_worker.py +++ b/nemo_reinforcer/models/policy/fsdp1_policy_worker.py @@ -152,7 +152,7 @@ def do_fsdp(model): # used for streaming update inference engine weights self._held_sharded_state_dict_reference = None - self._held_single_streamed_param_reference = None + self._held_streamed_param_reference = None # register_fsdp_forward_method(self.model, "generate") if init_optimizer: @@ -712,32 +712,45 @@ def prepare_weights_for_ipc(self): state_dict_config=ShardedStateDictConfig(), ): self._held_sharded_state_dict_reference = self.model.state_dict() - return self._held_sharded_state_dict_reference.keys() + + # Collect info for streaming multiple tensors + state_dict_info = [] + for name, tensor in self._held_sharded_state_dict_reference.items(): + # dtensor's numel will return complete tensor instead of only local tensor + size_in_bytes = tensor.element_size() * tensor.numel() + state_dict_info.append((name, size_in_bytes)) + + return state_dict_info @torch.no_grad() - def get_weights_ipc_handles(self, key): + def get_weights_ipc_handles(self, keys): from torch.distributed.tensor import DTensor from torch.multiprocessing.reductions import reduce_tensor - # Get device UUID for IPC - device_uuid = self.report_device_id() - - # Get full_tensor for dtensor (GPU > 1) - tensor = self._held_sharded_state_dict_reference[key] - if isinstance(tensor, DTensor): - full_tensor = tensor.full_tensor() - else: - full_tensor = tensor + converted_params = {} + for key in keys: + # Get full_tensor for dtensor (GPU > 1) + tensor = self._held_sharded_state_dict_reference[key] + if isinstance(tensor, DTensor): + full_tensor = tensor.full_tensor() + else: + full_tensor = tensor + # Convert parameters to the configured dtype + converted_params[key] = full_tensor.to(self.dtype, non_blocking=True) - # Convert parameters to the configured dtype - full_tensor = full_tensor.to(self.dtype, non_blocking=True) # Temporary record the full tensor for cleanup # It is needed for cleanup the last full_tensor in the refit process - self._held_single_streamed_param_reference = full_tensor + self._held_streamed_param_reference = converted_params + + # Get device UUID for IPC + device_uuid = self.report_device_id() + # Create handles for the tensors + all_handles = [] + for key, p in converted_params.items(): + handle = reduce_tensor(p.detach()) + all_handles.append((key, handle)) - # Create a handle for the tensor - handle = reduce_tensor(full_tensor.detach()) - return {device_uuid: (key, handle)} + return {device_uuid: all_handles} def prepare_for_lp_inference(self): self.model = self.manual_load_to_gpu(self.model) @@ -792,9 +805,9 @@ def offload_after_refit(self): if self._held_sharded_state_dict_reference is not None: del self._held_sharded_state_dict_reference self._held_sharded_state_dict_reference = None - if self._held_single_streamed_param_reference is not None: - del self._held_single_streamed_param_reference - self._held_single_streamed_param_reference = None + if self._held_streamed_param_reference is not None: + del self._held_streamed_param_reference + self._held_streamed_param_reference = None gc.collect() torch.cuda.empty_cache() diff --git a/nemo_reinforcer/models/policy/hf_policy.py b/nemo_reinforcer/models/policy/hf_policy.py index a0c6ef2945..a82a14656f 100644 --- a/nemo_reinforcer/models/policy/hf_policy.py +++ b/nemo_reinforcer/models/policy/hf_policy.py @@ -254,7 +254,7 @@ def prepare_weights_for_ipc(self): """Prepare the weights for IPC. Returns: - dict: A dictionary containing the keys of the parameters. + dict: A dictionary containing the state_dict_info of the model. """ futures = self.worker_group.run_all_workers_single_data( "prepare_weights_for_ipc", only_on="all_tied_workers" From ef530fb856caa6f7a335ee980526e868bf1f5a1a Mon Sep 17 00:00:00 2001 From: Yuki Huang Date: Mon, 21 Apr 2025 11:47:00 +0000 Subject: [PATCH 09/13] add refit_buffer_size to config Signed-off-by: Yuki Huang --- examples/configs/grpo_math_1B.yaml | 1 + examples/configs/grpo_math_8B.yaml | 1 + nemo_reinforcer/algorithms/grpo.py | 9 +++++---- nemo_reinforcer/models/policy/__init__.py | 1 + tests/unit/models/generation/test_vllm_generation.py | 9 +++++---- 5 files changed, 13 insertions(+), 8 deletions(-) diff --git a/examples/configs/grpo_math_1B.yaml b/examples/configs/grpo_math_1B.yaml index 4cf474df01..4d8134d82d 100644 --- a/examples/configs/grpo_math_1B.yaml +++ b/examples/configs/grpo_math_1B.yaml @@ -35,6 +35,7 @@ policy: precision: "bfloat16" fsdp_offload_enabled: false activation_checkpointing_enabled: false + refit_buffer_size: 4 # used for refitting inference engine, the unit is GB dtensor_cfg: enabled: false diff --git a/examples/configs/grpo_math_8B.yaml b/examples/configs/grpo_math_8B.yaml index e791c66a34..69f6cab927 100644 --- a/examples/configs/grpo_math_8B.yaml +++ b/examples/configs/grpo_math_8B.yaml @@ -17,6 +17,7 @@ policy: precision: "bfloat16" fsdp_offload_enabled: false activation_checkpointing_enabled: false + refit_buffer_size: 4 # used for refitting inference engine, the unit is GB optimizer: name: "torch.optim.AdamW" diff --git a/nemo_reinforcer/algorithms/grpo.py b/nemo_reinforcer/algorithms/grpo.py index e67edd7f19..686672f8db 100644 --- a/nemo_reinforcer/algorithms/grpo.py +++ b/nemo_reinforcer/algorithms/grpo.py @@ -275,7 +275,7 @@ def setup( def refit_policy_generation( policy: PolicyInterface, policy_generation: GenerationInterface, - refit_buffer_size: int = 10, # GB + refit_buffer_size: int, # GB ): """Refit the policy generation interface with the latest policy weights.""" s = time.time() @@ -462,12 +462,13 @@ def grpo_train( consumed_samples = grpo_save_state["consumed_samples"] val_period = master_config["grpo"]["val_period"] val_at_start = master_config["grpo"]["val_at_start"] + refit_buffer_size = master_config["policy"]["refit_buffer_size"] # Run validation at the start if configured if val_at_start and step == 0: print("\nšŸ” Running initial validation...") if NEED_REFIT and POLICY_GENERATION_STALE: - refit_policy_generation(policy, policy_generation) + refit_policy_generation(policy, policy_generation, refit_buffer_size) POLICY_GENERATION_STALE = False else: policy_generation.prepare_for_generation() @@ -516,7 +517,7 @@ def grpo_train( print(f"ā–¶ Generating responses for batch of size {len(input_ids)}...") with timer.time("prepare_for_generation"): if NEED_REFIT and POLICY_GENERATION_STALE: - refit_policy_generation(policy, policy_generation) + refit_policy_generation(policy, policy_generation, refit_buffer_size) POLICY_GENERATION_STALE = False else: policy_generation.prepare_for_generation() @@ -620,7 +621,7 @@ def grpo_train( # Run validation if it's a validation step if val_period > 0 and (step + 1) % val_period == 0: if NEED_REFIT and POLICY_GENERATION_STALE: - refit_policy_generation(policy, policy_generation) + refit_policy_generation(policy, policy_generation, refit_buffer_size) POLICY_GENERATION_STALE = False else: policy_generation.prepare_for_generation() diff --git a/nemo_reinforcer/models/policy/__init__.py b/nemo_reinforcer/models/policy/__init__.py index 795e08f895..6200f30438 100644 --- a/nemo_reinforcer/models/policy/__init__.py +++ b/nemo_reinforcer/models/policy/__init__.py @@ -44,3 +44,4 @@ class PolicyConfig(TypedDict): max_grad_norm: Optional[Union[float, int]] fsdp_offload_enabled: bool activation_checkpointing_enabled: bool + refit_buffer_size: int diff --git a/tests/unit/models/generation/test_vllm_generation.py b/tests/unit/models/generation/test_vllm_generation.py index 72ea3f9127..c10dbcb03b 100644 --- a/tests/unit/models/generation/test_vllm_generation.py +++ b/tests/unit/models/generation/test_vllm_generation.py @@ -66,6 +66,7 @@ def get_basic_hf_test_config(enable_dtensor: bool = False) -> PolicyConfig: "precision": "float32", "fsdp_offload_enabled": False, "activation_checkpointing_enabled": False, + "refit_buffer_size": 4, "optimizer": { "name": "torch.optim.AdamW", "kwargs": { @@ -271,7 +272,7 @@ def test_vllm_worker_seed_behavior(cluster, tokenizer): hf_policy = HfPolicy(cluster, hf_config, tokenizer) print(f"refitting vllm policy...") - refit_policy_generation(hf_policy, policy) + refit_policy_generation(hf_policy, policy, hf_config["refit_buffer_size"]) try: # Generate with duplicated prompts @@ -434,7 +435,7 @@ def test_vllm_generation_with_hf_training(cluster, tokenizer, enable_dtensor): hf_policy = HfPolicy(cluster, hf_config, tokenizer) print(f"refitting vllm policy...") - refit_policy_generation(hf_policy, vllm_policy) + refit_policy_generation(hf_policy, vllm_policy, hf_config["refit_buffer_size"]) # Step 1: Use vLLM for generation print("Using vLLM policy for fast generation...") @@ -780,7 +781,7 @@ def test_vllm_weight_update_memory(cluster, tokenizer, enable_dtensor): # reset peak memory stats before refit workers = hf_policy.worker_group.workers ray.get([w.reset_peak_memory_stats.remote() for w in workers]) - refit_policy_generation(hf_policy, vllm_policy) + refit_policy_generation(hf_policy, vllm_policy, refit_buffer_size=1) gpu_infos = ray.get([w.get_gpu_info.remote() for w in workers]) # Gather memory stats @@ -847,7 +848,7 @@ def test_vllm_generation_with_stop( hf_policy = HfPolicy(cluster, hf_config, tokenizer) print(f"refitting vllm policy...") - refit_policy_generation(hf_policy, vllm_generation) + refit_policy_generation(hf_policy, vllm_generation, hf_config["refit_buffer_size"]) # test generate outputs = vllm_generation.generate(test_input_data, greedy=True) From 590eb4dd544be10de4d02eb98ac1c58ab0e2c30b Mon Sep 17 00:00:00 2001 From: Yuki Huang Date: Tue, 22 Apr 2025 03:32:00 +0000 Subject: [PATCH 10/13] remove debug code Signed-off-by: Yuki Huang --- nemo_reinforcer/algorithms/grpo.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/nemo_reinforcer/algorithms/grpo.py b/nemo_reinforcer/algorithms/grpo.py index 686672f8db..1393e0f8af 100644 --- a/nemo_reinforcer/algorithms/grpo.py +++ b/nemo_reinforcer/algorithms/grpo.py @@ -271,20 +271,14 @@ def setup( # =============================================================================== -import time def refit_policy_generation( policy: PolicyInterface, policy_generation: GenerationInterface, refit_buffer_size: int, # GB ): """Refit the policy generation interface with the latest policy weights.""" - s = time.time() policy.offload_before_refit() - print(f"[offload_before_refit] {time.time() - s}s") - s = time.time() policy_generation.prepare_for_generation(tags=["weights"]) - print(f"[prepare_for_generation - weights] {time.time() - s}s") - s = time.time() # Streaming update weights to save memory state_dict_info = policy.prepare_weights_for_ipc() # group keys to save time @@ -299,20 +293,12 @@ def refit_policy_generation( available_bytes = refit_buffer_size * (1024 ** 3) if len(keys) > 0: split_keys.append(keys) - print(f"[prepare_weights_for_ipc] {time.time() - s}s") - s = time.time() # do update for keys in split_keys: ipc_handles = policy.get_weights_ipc_handles(keys) policy_generation.update_weights(ipc_handles) - print(f"[update_weights] {time.time() - s}s") - s = time.time() policy.offload_after_refit() - print(f"[offload_after_refit] {time.time() - s}s") - s = time.time() policy_generation.prepare_for_generation(tags=["kv_cache"]) - print(f"[prepare_for_generation - kv_cache] {time.time() - s}s") - s = time.time() def generate_responses( From 9de9a486561aabb575dfecf3ac83e3c44dd31ec0 Mon Sep 17 00:00:00 2001 From: Yuki Huang Date: Tue, 22 Apr 2025 03:49:41 +0000 Subject: [PATCH 11/13] fix code format Signed-off-by: Yuki Huang --- nemo_reinforcer/algorithms/grpo.py | 16 ++++++++++++---- .../models/generation/vllm_backend.py | 8 ++++---- .../models/generation/test_vllm_generation.py | 6 +++++- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/nemo_reinforcer/algorithms/grpo.py b/nemo_reinforcer/algorithms/grpo.py index 1393e0f8af..e5802a9764 100644 --- a/nemo_reinforcer/algorithms/grpo.py +++ b/nemo_reinforcer/algorithms/grpo.py @@ -282,7 +282,7 @@ def refit_policy_generation( # Streaming update weights to save memory state_dict_info = policy.prepare_weights_for_ipc() # group keys to save time - available_bytes = refit_buffer_size * (1024 ** 3) + available_bytes = refit_buffer_size * (1024**3) split_keys, keys = [], [] for key, size_in_bytes in state_dict_info: keys.append(key) @@ -290,7 +290,7 @@ def refit_policy_generation( if available_bytes <= 0: split_keys.append(keys) keys = [] - available_bytes = refit_buffer_size * (1024 ** 3) + available_bytes = refit_buffer_size * (1024**3) if len(keys) > 0: split_keys.append(keys) # do update @@ -503,7 +503,11 @@ def grpo_train( print(f"ā–¶ Generating responses for batch of size {len(input_ids)}...") with timer.time("prepare_for_generation"): if NEED_REFIT and POLICY_GENERATION_STALE: - refit_policy_generation(policy, policy_generation, refit_buffer_size) + refit_policy_generation( + policy, + policy_generation, + refit_buffer_size, + ) POLICY_GENERATION_STALE = False else: policy_generation.prepare_for_generation() @@ -607,7 +611,11 @@ def grpo_train( # Run validation if it's a validation step if val_period > 0 and (step + 1) % val_period == 0: if NEED_REFIT and POLICY_GENERATION_STALE: - refit_policy_generation(policy, policy_generation, refit_buffer_size) + refit_policy_generation( + policy, + policy_generation, + refit_buffer_size, + ) POLICY_GENERATION_STALE = False else: policy_generation.prepare_for_generation() diff --git a/nemo_reinforcer/models/generation/vllm_backend.py b/nemo_reinforcer/models/generation/vllm_backend.py index 8792a620b5..28cf9fbd2f 100644 --- a/nemo_reinforcer/models/generation/vllm_backend.py +++ b/nemo_reinforcer/models/generation/vllm_backend.py @@ -40,12 +40,12 @@ def update_weights_from_ipc_handles(self, ipc_handles): try: # Get handles for this device device_uuid = self.report_device_id() - named_handles = ipc_handles[device_uuid] + handles = ipc_handles[device_uuid] device_id = self.device.index - weights = [] - for name, handle in named_handles: - # Process each handle to get the tensor + + # Process each handle to get the tensor + for name, handle in handles: func, args = handle list_args = list(args) # Update device ID to match the current device diff --git a/tests/unit/models/generation/test_vllm_generation.py b/tests/unit/models/generation/test_vllm_generation.py index c10dbcb03b..1e17769a5c 100644 --- a/tests/unit/models/generation/test_vllm_generation.py +++ b/tests/unit/models/generation/test_vllm_generation.py @@ -848,7 +848,11 @@ def test_vllm_generation_with_stop( hf_policy = HfPolicy(cluster, hf_config, tokenizer) print(f"refitting vllm policy...") - refit_policy_generation(hf_policy, vllm_generation, hf_config["refit_buffer_size"]) + refit_policy_generation( + hf_policy, + vllm_generation, + hf_config["refit_buffer_size"], + ) # test generate outputs = vllm_generation.generate(test_input_data, greedy=True) From 019bb47698f82b4ebc56c5ab7a8becc4bdc37edf Mon Sep 17 00:00:00 2001 From: Yuki Huang Date: Tue, 22 Apr 2025 05:32:28 +0000 Subject: [PATCH 12/13] fix unit test Signed-off-by: Yuki Huang --- tests/unit/models/generation/test_vllm_generation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/models/generation/test_vllm_generation.py b/tests/unit/models/generation/test_vllm_generation.py index 1e17769a5c..04958756df 100644 --- a/tests/unit/models/generation/test_vllm_generation.py +++ b/tests/unit/models/generation/test_vllm_generation.py @@ -708,8 +708,8 @@ def test_vllm_weight_update_and_prefix_cache_reset( print("Updating vLLM weights from HF policy...") param_keys = hf_policy.prepare_weights_for_ipc() - for key in param_keys: - ipc_handles = hf_policy.get_weights_ipc_handles(key) + for key, _ in param_keys: + ipc_handles = hf_policy.get_weights_ipc_handles([key]) update_success = vllm_policy.update_weights(ipc_handles) assert update_success, "Weight update should succeed" print("vLLM weights successfully updated.") From b6f6cba60fdd507f21de398ccc54de3a8915edc2 Mon Sep 17 00:00:00 2001 From: Parth Chadha Date: Tue, 22 Apr 2025 15:24:11 -0700 Subject: [PATCH 13/13] Rename refit_buffer_size to refit_buffer_size_gb; fix the logic of grouping keys to not include the key that exceeds the size limit Signed-off-by: Parth Chadha --- examples/configs/grpo_math_1B.yaml | 2 +- examples/configs/grpo_math_8B.yaml | 2 +- nemo_reinforcer/algorithms/grpo.py | 23 +++++++++++-------- nemo_reinforcer/models/generation/vllm.py | 2 ++ nemo_reinforcer/models/policy/__init__.py | 2 +- .../models/generation/test_vllm_generation.py | 12 ++++++---- 6 files changed, 25 insertions(+), 18 deletions(-) diff --git a/examples/configs/grpo_math_1B.yaml b/examples/configs/grpo_math_1B.yaml index 918fb709fd..fe32b9691f 100644 --- a/examples/configs/grpo_math_1B.yaml +++ b/examples/configs/grpo_math_1B.yaml @@ -39,7 +39,7 @@ policy: precision: "bfloat16" fsdp_offload_enabled: false activation_checkpointing_enabled: false - refit_buffer_size: 4 # used for refitting inference engine, the unit is GB + refit_buffer_size_gb: 4 # used for refitting inference engine, the unit is GB dtensor_cfg: enabled: false diff --git a/examples/configs/grpo_math_8B.yaml b/examples/configs/grpo_math_8B.yaml index 69f6cab927..dcd68a9509 100644 --- a/examples/configs/grpo_math_8B.yaml +++ b/examples/configs/grpo_math_8B.yaml @@ -17,7 +17,7 @@ policy: precision: "bfloat16" fsdp_offload_enabled: false activation_checkpointing_enabled: false - refit_buffer_size: 4 # used for refitting inference engine, the unit is GB + refit_buffer_size_gb: 4 # used for refitting inference engine, the unit is GB optimizer: name: "torch.optim.AdamW" diff --git a/nemo_reinforcer/algorithms/grpo.py b/nemo_reinforcer/algorithms/grpo.py index ce3b36ea83..592a4bc4d8 100644 --- a/nemo_reinforcer/algorithms/grpo.py +++ b/nemo_reinforcer/algorithms/grpo.py @@ -279,7 +279,7 @@ def setup( def refit_policy_generation( policy: PolicyInterface, policy_generation: GenerationInterface, - refit_buffer_size: int, # GB + refit_buffer_size_gb: int, # GB ): """Refit the policy generation interface with the latest policy weights.""" policy.offload_before_refit() @@ -287,15 +287,18 @@ def refit_policy_generation( # Streaming update weights to save memory state_dict_info = policy.prepare_weights_for_ipc() # group keys to save time - available_bytes = refit_buffer_size * (1024**3) + available_bytes = refit_buffer_size_gb * (1024**3) split_keys, keys = [], [] for key, size_in_bytes in state_dict_info: + if size_in_bytes > available_bytes: + if keys: + split_keys.append(keys) + keys = [] + available_bytes = refit_buffer_size_gb * (1024**3) + keys.append(key) available_bytes -= size_in_bytes - if available_bytes <= 0: - split_keys.append(keys) - keys = [] - available_bytes = refit_buffer_size * (1024**3) + if len(keys) > 0: split_keys.append(keys) # do update @@ -339,13 +342,13 @@ def grpo_train( consumed_samples = grpo_save_state["consumed_samples"] val_period = master_config["grpo"]["val_period"] val_at_start = master_config["grpo"]["val_at_start"] - refit_buffer_size = master_config["policy"]["refit_buffer_size"] + refit_buffer_size_gb = master_config["policy"]["refit_buffer_size_gb"] # Run validation at the start if configured if val_at_start and step == 0: print("\nšŸ” Running initial validation...") if NEED_REFIT and POLICY_GENERATION_STALE: - refit_policy_generation(policy, policy_generation, refit_buffer_size) + refit_policy_generation(policy, policy_generation, refit_buffer_size_gb) POLICY_GENERATION_STALE = False else: policy_generation.prepare_for_generation() @@ -390,7 +393,7 @@ def grpo_train( refit_policy_generation( policy, policy_generation, - refit_buffer_size, + refit_buffer_size_gb, ) POLICY_GENERATION_STALE = False else: @@ -502,7 +505,7 @@ def grpo_train( refit_policy_generation( policy, policy_generation, - refit_buffer_size, + refit_buffer_size_gb, ) POLICY_GENERATION_STALE = False else: diff --git a/nemo_reinforcer/models/generation/vllm.py b/nemo_reinforcer/models/generation/vllm.py index 7a9323ca56..1f72eb3df0 100644 --- a/nemo_reinforcer/models/generation/vllm.py +++ b/nemo_reinforcer/models/generation/vllm.py @@ -456,6 +456,8 @@ def sleep(self): def wake_up(self, **kwargs): # tags like ["weights", "kv_cache"] + # We can call this function with just tags=["weights"] while doing refit to + # avoid spiking memory with the kv_cache while the training fwk is awake. if "tags" in kwargs: self.llm.wake_up(tags=kwargs["tags"]) else: diff --git a/nemo_reinforcer/models/policy/__init__.py b/nemo_reinforcer/models/policy/__init__.py index 6200f30438..c83a8d0bf9 100644 --- a/nemo_reinforcer/models/policy/__init__.py +++ b/nemo_reinforcer/models/policy/__init__.py @@ -44,4 +44,4 @@ class PolicyConfig(TypedDict): max_grad_norm: Optional[Union[float, int]] fsdp_offload_enabled: bool activation_checkpointing_enabled: bool - refit_buffer_size: int + refit_buffer_size_gb: int diff --git a/tests/unit/models/generation/test_vllm_generation.py b/tests/unit/models/generation/test_vllm_generation.py index 221b529235..2232ea6499 100644 --- a/tests/unit/models/generation/test_vllm_generation.py +++ b/tests/unit/models/generation/test_vllm_generation.py @@ -66,7 +66,7 @@ def get_basic_hf_test_config(enable_dtensor: bool = False) -> PolicyConfig: "precision": "float32", "fsdp_offload_enabled": False, "activation_checkpointing_enabled": False, - "refit_buffer_size": 4, + "refit_buffer_size_gb": 4, "optimizer": { "name": "torch.optim.AdamW", "kwargs": { @@ -272,7 +272,7 @@ def test_vllm_worker_seed_behavior(cluster, tokenizer): hf_policy = HfPolicy(cluster, hf_config, tokenizer) print(f"refitting vllm policy...") - refit_policy_generation(hf_policy, policy, hf_config["refit_buffer_size"]) + refit_policy_generation(hf_policy, policy, hf_config["refit_buffer_size_gb"]) try: # Generate with duplicated prompts @@ -435,7 +435,9 @@ def test_vllm_generation_with_hf_training(cluster, tokenizer, enable_dtensor): hf_policy = HfPolicy(cluster, hf_config, tokenizer) print(f"refitting vllm policy...") - refit_policy_generation(hf_policy, vllm_policy, hf_config["refit_buffer_size"]) + refit_policy_generation( + hf_policy, vllm_policy, hf_config["refit_buffer_size_gb"] + ) # Step 1: Use vLLM for generation print("Using vLLM policy for fast generation...") @@ -781,7 +783,7 @@ def test_vllm_weight_update_memory(cluster, tokenizer, enable_dtensor): # reset peak memory stats before refit workers = hf_policy.worker_group.workers ray.get([w.reset_peak_memory_stats.remote() for w in workers]) - refit_policy_generation(hf_policy, vllm_policy, refit_buffer_size=1) + refit_policy_generation(hf_policy, vllm_policy, refit_buffer_size_gb=1) gpu_infos = ray.get([w.get_gpu_info.remote() for w in workers]) # Gather memory stats @@ -851,7 +853,7 @@ def test_vllm_generation_with_stop( refit_policy_generation( hf_policy, vllm_generation, - hf_config["refit_buffer_size"], + hf_config["refit_buffer_size_gb"], ) # test generate