diff --git a/src/diffusers/hooks/taylorseer_cache.py b/src/diffusers/hooks/taylorseer_cache.py
index 3c5a606bd2ed..7cad9f4fa161 100644
--- a/src/diffusers/hooks/taylorseer_cache.py
+++ b/src/diffusers/hooks/taylorseer_cache.py
@@ -69,13 +69,15 @@ class TaylorSeerCacheConfig:
         - Patterns are matched using `re.fullmatch` on the module name.
         - If `skip_predict_identifiers` or `cache_identifiers` are provided, only matching modules are hooked.
         - If neither is provided, all attention-like modules are hooked by default.
-        - Example of inactive and active usage:
-          ```
-          def forward(x):
-              x = self.module1(x)  # inactive module: returns zeros tensor based on shape recorded during full compute
-              x = self.module2(x)  # active module: caches output here, avoiding recomputation of prior steps
-              return x
-          ```
+
+    Example of inactive and active usage:
+
+    ```py
+    def forward(x):
+        x = self.module1(x)  # inactive module: returns zeros tensor based on shape recorded during full compute
+        x = self.module2(x)  # active module: caches output here, avoiding recomputation of prior steps
+        return x
+    ```
     """
 
     cache_interval: int = 5
diff --git a/src/diffusers/utils/dummy_pt_objects.py b/src/diffusers/utils/dummy_pt_objects.py
index 6be7618fcd5e..8628893200fe 100644
--- a/src/diffusers/utils/dummy_pt_objects.py
+++ b/src/diffusers/utils/dummy_pt_objects.py
@@ -257,6 +257,21 @@ def from_pretrained(cls, *args, **kwargs):
         requires_backends(cls, ["torch"])
 
 
+class TaylorSeerCacheConfig(metaclass=DummyObject):
+    _backends = ["torch"]
+
+    def __init__(self, *args, **kwargs):
+        requires_backends(self, ["torch"])
+
+    @classmethod
+    def from_config(cls, *args, **kwargs):
+        requires_backends(cls, ["torch"])
+
+    @classmethod
+    def from_pretrained(cls, *args, **kwargs):
+        requires_backends(cls, ["torch"])
+
+
 def apply_faster_cache(*args, **kwargs):
     requires_backends(apply_faster_cache, ["torch"])
 
@@ -273,6 +288,10 @@ def apply_pyramid_attention_broadcast(*args, **kwargs):
     requires_backends(apply_pyramid_attention_broadcast, ["torch"])
 
 
+def apply_taylorseer_cache(*args, **kwargs):
+    requires_backends(apply_taylorseer_cache, ["torch"])
+
+
 class AllegroTransformer3DModel(metaclass=DummyObject):
     _backends = ["torch"]