From 562847df359339200daf58c03a7dedf2ebea0d0a Mon Sep 17 00:00:00 2001
From: Wenhua Cheng <wenhua.cheng@intel.com>
Date: Thu, 30 Apr 2026 22:33:35 +0800
Subject: [PATCH 1/2] fix

Signed-off-by: Wenhua Cheng <wenhua.cheng@intel.com>
---
 auto_round/algorithms/quantization/base.py |  6 -----
 auto_round/compressors/base.py             | 26 +++++++++++-----------
 auto_round/compressors_new/base.py         |  8 +++----
 3 files changed, 17 insertions(+), 23 deletions(-)

diff --git a/auto_round/algorithms/quantization/base.py b/auto_round/algorithms/quantization/base.py
index 12d972554..13739dbb0 100644
--- a/auto_round/algorithms/quantization/base.py
+++ b/auto_round/algorithms/quantization/base.py
@@ -377,12 +377,6 @@ def _resolve_block_forward(self):
             self.config.is_act_quantize and (not self.config.act_dynamic or self.config.is_act_nv_fp)
         ) or self.enable_alg_ext:
             self._resolved_block_forward = block_forward
-        elif self.compress_context.enable_torch_compile:
-            compiled = self.__dict__.get("_compiled_block_forward")
-            if compiled is None:
-                compiled = compile_func(block_forward, self.compress_context.device)
-                self._compiled_block_forward = compiled
-            self._resolved_block_forward = compiled
         else:
             self._resolved_block_forward = block_forward
         return self._resolved_block_forward
diff --git a/auto_round/compressors/base.py b/auto_round/compressors/base.py
index 0503c8235..ed712b7b8 100644
--- a/auto_round/compressors/base.py
+++ b/auto_round/compressors/base.py
@@ -533,19 +533,19 @@ def __init__(
         self.enable_torch_compile = enable_torch_compile
         self._adjust_torch_compile(enable_torch_compile)
 
-        if (
-            (self.act_bits < 16 and (not self.act_dynamic or self.data_type == "nvfp"))  # have hooks
-            or self.enable_alg_ext  # Use imatrix
-            or not self.disable_opt_rtn  # Use imatrix
-        ):
-            self.block_forward = block_forward
-        else:
-            # TODO FIXME
-            # This function could not be compiled, causing a large accuracy drop when `enable_alg_ext` is used.
-            # To avoid issues, remove it in all scenarios except WOQ.
-            self.block_forward = (
-                compile_func(block_forward, self.device) if self.enable_torch_compile else block_forward
-            )
+        # if (
+        #     (self.act_bits < 16 and (not self.act_dynamic or self.data_type == "nvfp"))  # have hooks
+        #     or self.enable_alg_ext  # Use imatrix
+        #     or not self.disable_opt_rtn  # Use imatrix
+        # ):
+        #     self.block_forward = block_forward
+        # else:
+        #     # TODO FIXME
+        #     # This function could not be compiled, causing a large accuracy drop when `enable_alg_ext` is used.
+        #     # To avoid issues, remove it in all scenarios except WOQ.
+        #     self.block_forward = (
+        #         compile_func(block_forward, self.device) if self.enable_torch_compile else block_forward
+        #     )
 
         self._check_configs()
         torch.set_printoptions(precision=3, sci_mode=True)
diff --git a/auto_round/compressors_new/base.py b/auto_round/compressors_new/base.py
index 025dfbd3f..2ec1b33ef 100644
--- a/auto_round/compressors_new/base.py
+++ b/auto_round/compressors_new/base.py
@@ -971,10 +971,10 @@ def _hardware_setup(self) -> None:
         # Only compile block_forward when it will actually be used (calibration path).
         # For zero-shot compressors (need_calib=False), block_forward is never called,
         # so skipping compilation avoids unnecessary HPU workspace allocation.
-        if self.enable_torch_compile and not _needs_plain_forward and self.need_calib:
-            self.block_forward = compile_func(block_forward, self.compress_context.device)
-        else:
-            self.block_forward = block_forward
+        # if self.enable_torch_compile and not _needs_plain_forward and self.need_calib:
+        #     self.block_forward = compile_func(block_forward, self.compress_context.device)
+        # else:
+        #     self.block_forward = block_forward
         if self.compress_context.low_cpu_mem_usage:
             self._offloader.reset()
 

From 89501f458a5fabdf7051c7aad56790da234b4dc1 Mon Sep 17 00:00:00 2001
From: Wenhua Cheng <wenhua.cheng@intel.com>
Date: Thu, 30 Apr 2026 22:53:55 +0800
Subject: [PATCH 2/2] fix

---
 auto_round/compressors/base.py     | 2 +-
 auto_round/compressors_new/base.py | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/auto_round/compressors/base.py b/auto_round/compressors/base.py
index ed712b7b8..03b76135a 100644
--- a/auto_round/compressors/base.py
+++ b/auto_round/compressors/base.py
@@ -546,7 +546,7 @@ def __init__(
         #     self.block_forward = (
         #         compile_func(block_forward, self.device) if self.enable_torch_compile else block_forward
         #     )
-
+        self.block_forward = block_forward
         self._check_configs()
         torch.set_printoptions(precision=3, sci_mode=True)
 
diff --git a/auto_round/compressors_new/base.py b/auto_round/compressors_new/base.py
index 2ec1b33ef..ca332a50a 100644
--- a/auto_round/compressors_new/base.py
+++ b/auto_round/compressors_new/base.py
@@ -975,6 +975,7 @@ def _hardware_setup(self) -> None:
         #     self.block_forward = compile_func(block_forward, self.compress_context.device)
         # else:
         #     self.block_forward = block_forward
+        self.block_forward = block_forward
         if self.compress_context.low_cpu_mem_usage:
             self._offloader.reset()