From 503ca812cff0455aa2a59fbb8d62deed0ece0b87 Mon Sep 17 00:00:00 2001
From: Aryan <aryan@huggingface.co>
Date: Mon, 14 Jul 2025 08:02:00 +0200
Subject: [PATCH 1/4] update

---
 tests/pipelines/wan/test_wan.py                | 17 +++++++++--------
 tests/pipelines/wan/test_wan_image_to_video.py | 13 ++++++++-----
 tests/pipelines/wan/test_wan_video_to_video.py | 13 ++++++++-----
 3 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/tests/pipelines/wan/test_wan.py b/tests/pipelines/wan/test_wan.py
index 842b9d19b34e..fdb2d298356e 100644
--- a/tests/pipelines/wan/test_wan.py
+++ b/tests/pipelines/wan/test_wan.py
@@ -15,7 +15,6 @@
 import gc
 import unittest
 
-import numpy as np
 import torch
 from transformers import AutoTokenizer, T5EncoderModel
 
@@ -29,9 +28,7 @@
 )
 
 from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
-from ..test_pipelines_common import (
-    PipelineTesterMixin,
-)
+from ..test_pipelines_common import PipelineTesterMixin
 
 
 enable_full_determinism()
@@ -127,11 +124,15 @@ def test_inference(self):
         inputs = self.get_dummy_inputs(device)
         video = pipe(**inputs).frames
         generated_video = video[0]
-
         self.assertEqual(generated_video.shape, (9, 3, 16, 16))
-        expected_video = torch.randn(9, 3, 16, 16)
-        max_diff = np.abs(generated_video - expected_video).max()
-        self.assertLessEqual(max_diff, 1e10)
+
+        # fmt: off
+        expected_slice = torch.tensor([0.4525, 0.452, 0.4485, 0.4534, 0.4524, 0.4529, 0.454, 0.453, 0.5127, 0.5326, 0.5204, 0.5253, 0.5439, 0.5424, 0.5133, 0.5078])
+        # fmt: on
+
+        generated_slice = generated_video.flatten()
+        generated_slice = torch.cat([generated_slice[:8], generated_slice[-8:]])
+        self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=1e-3))
 
     @unittest.skip("Test not supported")
     def test_attention_slicing_forward_pass(self):
diff --git a/tests/pipelines/wan/test_wan_image_to_video.py b/tests/pipelines/wan/test_wan_image_to_video.py
index 22dfef2eb0b1..6d1e1b93dfb3 100644
--- a/tests/pipelines/wan/test_wan_image_to_video.py
+++ b/tests/pipelines/wan/test_wan_image_to_video.py
@@ -14,7 +14,6 @@
 
 import unittest
 
-import numpy as np
 import torch
 from PIL import Image
 from transformers import (
@@ -147,11 +146,15 @@ def test_inference(self):
         inputs = self.get_dummy_inputs(device)
         video = pipe(**inputs).frames
         generated_video = video[0]
-
         self.assertEqual(generated_video.shape, (9, 3, 16, 16))
-        expected_video = torch.randn(9, 3, 16, 16)
-        max_diff = np.abs(generated_video - expected_video).max()
-        self.assertLessEqual(max_diff, 1e10)
+
+        # fmt: off
+        expected_slice = torch.tensor([0.4525, 0.4525, 0.4497, 0.4536, 0.452, 0.4529, 0.454, 0.4535, 0.5072, 0.5527, 0.5165, 0.5244, 0.5481, 0.5282, 0.5208, 0.5214])
+        # fmt: on
+
+        generated_slice = generated_video.flatten()
+        generated_slice = torch.cat([generated_slice[:8], generated_slice[-8:]])
+        self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=1e-3))
 
     @unittest.skip("Test not supported")
     def test_attention_slicing_forward_pass(self):
diff --git a/tests/pipelines/wan/test_wan_video_to_video.py b/tests/pipelines/wan/test_wan_video_to_video.py
index 11c748424a30..f4bb0960acee 100644
--- a/tests/pipelines/wan/test_wan_video_to_video.py
+++ b/tests/pipelines/wan/test_wan_video_to_video.py
@@ -14,7 +14,6 @@
 
 import unittest
 
-import numpy as np
 import torch
 from PIL import Image
 from transformers import AutoTokenizer, T5EncoderModel
@@ -123,11 +122,15 @@ def test_inference(self):
         inputs = self.get_dummy_inputs(device)
         video = pipe(**inputs).frames
         generated_video = video[0]
-
         self.assertEqual(generated_video.shape, (17, 3, 16, 16))
-        expected_video = torch.randn(17, 3, 16, 16)
-        max_diff = np.abs(generated_video - expected_video).max()
-        self.assertLessEqual(max_diff, 1e10)
+
+        # fmt: off
+        expected_slice = torch.tensor([0.4522, 0.4534, 0.4532, 0.4553, 0.4526, 0.4538, 0.4533, 0.4547, 0.513, 0.5176, 0.5286, 0.4958, 0.4955, 0.5381, 0.5154, 0.5195])
+        # fmt:on
+
+        generated_slice = generated_video.flatten()
+        generated_slice = torch.cat([generated_slice[:8], generated_slice[-8:]])
+        self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=1e-3))
 
     @unittest.skip("Test not supported")
     def test_attention_slicing_forward_pass(self):

From 2a6efc077bf443eddc34428a07c6a1671ee99615 Mon Sep 17 00:00:00 2001
From: Aryan <aryan@huggingface.co>
Date: Fri, 18 Jul 2025 17:08:05 +0200
Subject: [PATCH 2/4] fix wan vace test slice

---
 tests/pipelines/wan/test_wan_image_to_video.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/pipelines/wan/test_wan_image_to_video.py b/tests/pipelines/wan/test_wan_image_to_video.py
index 6d1e1b93dfb3..0502697ff6da 100644
--- a/tests/pipelines/wan/test_wan_image_to_video.py
+++ b/tests/pipelines/wan/test_wan_image_to_video.py
@@ -149,7 +149,7 @@ def test_inference(self):
         self.assertEqual(generated_video.shape, (9, 3, 16, 16))
 
         # fmt: off
-        expected_slice = torch.tensor([0.4525, 0.4525, 0.4497, 0.4536, 0.452, 0.4529, 0.454, 0.4535, 0.5072, 0.5527, 0.5165, 0.5244, 0.5481, 0.5282, 0.5208, 0.5214])
+        expected_slice = torch.tensor([0.4531, 0.4527, 0.4498, 0.4542, 0.4526, 0.4527, 0.4534, 0.4534, 0.5061, 0.5185, 0.5283, 0.5181, 0.5309, 0.5365, 0.5113, 0.5244])
         # fmt: on
 
         generated_slice = generated_video.flatten()

From ff27aea9bc760ed8fc3a19b1f983e5d46eff4c10 Mon Sep 17 00:00:00 2001
From: Aryan <aryan@huggingface.co>
Date: Fri, 18 Jul 2025 17:27:29 +0200
Subject: [PATCH 3/4] test

---
 tests/pipelines/wan/test_wan.py                | 1 +
 tests/pipelines/wan/test_wan_image_to_video.py | 1 +
 tests/pipelines/wan/test_wan_video_to_video.py | 1 +
 3 files changed, 3 insertions(+)

diff --git a/tests/pipelines/wan/test_wan.py b/tests/pipelines/wan/test_wan.py
index fdb2d298356e..4b506630a6c4 100644
--- a/tests/pipelines/wan/test_wan.py
+++ b/tests/pipelines/wan/test_wan.py
@@ -132,6 +132,7 @@ def test_inference(self):
 
         generated_slice = generated_video.flatten()
         generated_slice = torch.cat([generated_slice[:8], generated_slice[-8:]])
+        print("txt2video:", [round(x, 4) for x in generated_slice.tolist()])
         self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=1e-3))
 
     @unittest.skip("Test not supported")
diff --git a/tests/pipelines/wan/test_wan_image_to_video.py b/tests/pipelines/wan/test_wan_image_to_video.py
index 0502697ff6da..ac1f9c4f8555 100644
--- a/tests/pipelines/wan/test_wan_image_to_video.py
+++ b/tests/pipelines/wan/test_wan_image_to_video.py
@@ -154,6 +154,7 @@ def test_inference(self):
 
         generated_slice = generated_video.flatten()
         generated_slice = torch.cat([generated_slice[:8], generated_slice[-8:]])
+        print("image2video:", [round(x, 4) for x in generated_slice.tolist()])
         self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=1e-3))
 
     @unittest.skip("Test not supported")
diff --git a/tests/pipelines/wan/test_wan_video_to_video.py b/tests/pipelines/wan/test_wan_video_to_video.py
index f4bb0960acee..5dcb8611ab01 100644
--- a/tests/pipelines/wan/test_wan_video_to_video.py
+++ b/tests/pipelines/wan/test_wan_video_to_video.py
@@ -130,6 +130,7 @@ def test_inference(self):
 
         generated_slice = generated_video.flatten()
         generated_slice = torch.cat([generated_slice[:8], generated_slice[-8:]])
+        print("video2video:", [round(x, 4) for x in generated_slice.tolist()])
         self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=1e-3))
 
     @unittest.skip("Test not supported")

From c18b63c832ed1ef4d641e84e0a9c892cf98c95e3 Mon Sep 17 00:00:00 2001
From: Aryan <aryan@huggingface.co>
Date: Fri, 18 Jul 2025 17:44:39 +0200
Subject: [PATCH 4/4] fix

---
 tests/pipelines/wan/test_wan.py               |  1 -
 .../pipelines/wan/test_wan_image_to_video.py  | 52 +++++++++++++++++--
 .../pipelines/wan/test_wan_video_to_video.py  |  1 -
 3 files changed, 49 insertions(+), 5 deletions(-)

diff --git a/tests/pipelines/wan/test_wan.py b/tests/pipelines/wan/test_wan.py
index 4b506630a6c4..fdb2d298356e 100644
--- a/tests/pipelines/wan/test_wan.py
+++ b/tests/pipelines/wan/test_wan.py
@@ -132,7 +132,6 @@ def test_inference(self):
 
         generated_slice = generated_video.flatten()
         generated_slice = torch.cat([generated_slice[:8], generated_slice[-8:]])
-        print("txt2video:", [round(x, 4) for x in generated_slice.tolist()])
         self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=1e-3))
 
     @unittest.skip("Test not supported")
diff --git a/tests/pipelines/wan/test_wan_image_to_video.py b/tests/pipelines/wan/test_wan_image_to_video.py
index ac1f9c4f8555..6edc0cc882f7 100644
--- a/tests/pipelines/wan/test_wan_image_to_video.py
+++ b/tests/pipelines/wan/test_wan_image_to_video.py
@@ -149,12 +149,11 @@ def test_inference(self):
         self.assertEqual(generated_video.shape, (9, 3, 16, 16))
 
         # fmt: off
-        expected_slice = torch.tensor([0.4531, 0.4527, 0.4498, 0.4542, 0.4526, 0.4527, 0.4534, 0.4534, 0.5061, 0.5185, 0.5283, 0.5181, 0.5309, 0.5365, 0.5113, 0.5244])
+        expected_slice = torch.tensor([0.4525, 0.4525, 0.4497, 0.4536, 0.452, 0.4529, 0.454, 0.4535, 0.5072, 0.5527, 0.5165, 0.5244, 0.5481, 0.5282, 0.5208, 0.5214])
         # fmt: on
 
         generated_slice = generated_video.flatten()
         generated_slice = torch.cat([generated_slice[:8], generated_slice[-8:]])
-        print("image2video:", [round(x, 4) for x in generated_slice.tolist()])
         self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=1e-3))
 
     @unittest.skip("Test not supported")
@@ -166,7 +165,25 @@ def test_inference_batch_single_identical(self):
         pass
 
 
-class WanFLFToVideoPipelineFastTests(WanImageToVideoPipelineFastTests):
+class WanFLFToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
+    pipeline_class = WanImageToVideoPipeline
+    params = TEXT_TO_IMAGE_PARAMS - {"cross_attention_kwargs", "height", "width"}
+    batch_params = TEXT_TO_IMAGE_BATCH_PARAMS
+    image_params = TEXT_TO_IMAGE_IMAGE_PARAMS
+    image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS
+    required_optional_params = frozenset(
+        [
+            "num_inference_steps",
+            "generator",
+            "latents",
+            "return_dict",
+            "callback_on_step_end",
+            "callback_on_step_end_tensor_inputs",
+        ]
+    )
+    test_xformers_attention = False
+    supports_dduf = False
+
     def get_dummy_components(self):
         torch.manual_seed(0)
         vae = AutoencoderKLWan(
@@ -251,3 +268,32 @@ def get_dummy_inputs(self, device, seed=0):
             "output_type": "pt",
         }
         return inputs
+
+    def test_inference(self):
+        device = "cpu"
+
+        components = self.get_dummy_components()
+        pipe = self.pipeline_class(**components)
+        pipe.to(device)
+        pipe.set_progress_bar_config(disable=None)
+
+        inputs = self.get_dummy_inputs(device)
+        video = pipe(**inputs).frames
+        generated_video = video[0]
+        self.assertEqual(generated_video.shape, (9, 3, 16, 16))
+
+        # fmt: off
+        expected_slice = torch.tensor([0.4531, 0.4527, 0.4498, 0.4542, 0.4526, 0.4527, 0.4534, 0.4534, 0.5061, 0.5185, 0.5283, 0.5181, 0.5309, 0.5365, 0.5113, 0.5244])
+        # fmt: on
+
+        generated_slice = generated_video.flatten()
+        generated_slice = torch.cat([generated_slice[:8], generated_slice[-8:]])
+        self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=1e-3))
+
+    @unittest.skip("Test not supported")
+    def test_attention_slicing_forward_pass(self):
+        pass
+
+    @unittest.skip("TODO: revisit failing as it requires a very high threshold to pass")
+    def test_inference_batch_single_identical(self):
+        pass
diff --git a/tests/pipelines/wan/test_wan_video_to_video.py b/tests/pipelines/wan/test_wan_video_to_video.py
index 5dcb8611ab01..f4bb0960acee 100644
--- a/tests/pipelines/wan/test_wan_video_to_video.py
+++ b/tests/pipelines/wan/test_wan_video_to_video.py
@@ -130,7 +130,6 @@ def test_inference(self):
 
         generated_slice = generated_video.flatten()
         generated_slice = torch.cat([generated_slice[:8], generated_slice[-8:]])
-        print("video2video:", [round(x, 4) for x in generated_slice.tolist()])
         self.assertTrue(torch.allclose(generated_slice, expected_slice, atol=1e-3))
 
     @unittest.skip("Test not supported")