From 29378b0f79f479043002af0eb0dc9dff207cb870 Mon Sep 17 00:00:00 2001 From: Xin He Date: Mon, 23 Mar 2026 11:25:46 +0800 Subject: [PATCH 1/4] fix contiguous issue Signed-off-by: Xin He --- auto_round/compressors/shard_writer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/auto_round/compressors/shard_writer.py b/auto_round/compressors/shard_writer.py index 130a4b42b..be1c5a043 100644 --- a/auto_round/compressors/shard_writer.py +++ b/auto_round/compressors/shard_writer.py @@ -142,7 +142,9 @@ def _flush_shard(self): if self.use_safetensors: from safetensors.torch import save_file - save_file(self.current_shard_tensors, tmp_path) + contiguous_tensors = {k: v.contiguous() if not v.is_contiguous() else v + for k, v in self.current_shard_tensors.items()} + save_file(contiguous_tensors, tmp_path) else: torch.save(self.current_shard_tensors, tmp_path) From 455a7564b91f0d43b41b9b3f5e398fe3cfbf0f9d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 23 Mar 2026 03:26:35 +0000 Subject: [PATCH 2/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- auto_round/compressors/shard_writer.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/auto_round/compressors/shard_writer.py b/auto_round/compressors/shard_writer.py index be1c5a043..176ff7764 100644 --- a/auto_round/compressors/shard_writer.py +++ b/auto_round/compressors/shard_writer.py @@ -142,8 +142,9 @@ def _flush_shard(self): if self.use_safetensors: from safetensors.torch import save_file - contiguous_tensors = {k: v.contiguous() if not v.is_contiguous() else v - for k, v in self.current_shard_tensors.items()} + contiguous_tensors = { + k: v.contiguous() if not v.is_contiguous() else v for k, v in self.current_shard_tensors.items() + } save_file(contiguous_tensors, tmp_path) else: torch.save(self.current_shard_tensors, tmp_path) From a4d61ff78a1ed5b52d3ecac68bd43054a10110a9 Mon Sep 17 00:00:00 2001 From: Xin He Date: Mon, 23 Mar 2026 11:30:06 +0800 Subject: [PATCH 3/4] Update auto_round/compressors/shard_writer.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- auto_round/compressors/shard_writer.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/auto_round/compressors/shard_writer.py b/auto_round/compressors/shard_writer.py index 176ff7764..3e0f63c47 100644 --- a/auto_round/compressors/shard_writer.py +++ b/auto_round/compressors/shard_writer.py @@ -142,10 +142,12 @@ def _flush_shard(self): if self.use_safetensors: from safetensors.torch import save_file - contiguous_tensors = { - k: v.contiguous() if not v.is_contiguous() else v for k, v in self.current_shard_tensors.items() - } - save_file(contiguous_tensors, tmp_path) + # Ensure tensors are contiguous in-place to avoid duplicating them in a separate dict, + # which can increase peak RAM usage during saving. + for k, v in list(self.current_shard_tensors.items()): + if isinstance(v, torch.Tensor) and not v.is_contiguous(): + self.current_shard_tensors[k] = v.contiguous() + save_file(self.current_shard_tensors, tmp_path) else: torch.save(self.current_shard_tensors, tmp_path) From cc18a55383a41fb746cc7e9f1341291ff24f8679 Mon Sep 17 00:00:00 2001 From: Xin He Date: Mon, 23 Mar 2026 17:06:27 +0800 Subject: [PATCH 4/4] Update requirements_vllm.txt --- test/test_cuda/requirements_vllm.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_cuda/requirements_vllm.txt b/test/test_cuda/requirements_vllm.txt index fbedb5bf7..b92578c9c 100644 --- a/test/test_cuda/requirements_vllm.txt +++ b/test/test_cuda/requirements_vllm.txt @@ -1,2 +1,3 @@ vllm lm_eval >= 0.4.10 +ray