From ad0ca340681697999b52734124997bcae4cfb761 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Fri, 4 Aug 2023 17:59:36 +0000
Subject: [PATCH 01/56] Fuse loras

---
 src/diffusers/models/lora.py              | 24 +++++++++++++++++++++++
 src/diffusers/models/unet_2d_condition.py |  7 +++++++
 2 files changed, 31 insertions(+)

diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index 171f1323cf84..cd366f96fa3a 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -16,8 +16,12 @@
 
 import torch.nn.functional as F
 from torch import nn
+import torch
+from ..utils import logging
 
 
+logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
+
 class LoRALinearLayer(nn.Module):
     def __init__(self, in_features, out_features, rank=4, network_alpha=None, device=None, dtype=None):
         super().__init__()
@@ -116,6 +120,26 @@ def __init__(self, *args, lora_layer: Optional[LoRALinearLayer] = None, **kwargs
     def set_lora_layer(self, lora_layer: Optional[LoRAConv2dLayer]):
         self.lora_layer = lora_layer
 
+    def _fuse_lora(self):
+        if self.lora_layer is None:
+            return
+
+        dtype, device = self.weight.data.dtype, self.weight.data.device
+        logger.info(f"Fusing LoRA weights for {self.__class__}")
+
+        w_orig = self.weight.data.float()
+        w_up = self.lora_layer.up.weight.data.float()
+
+        w_down = self.lora_layer.down.weight.data.float()
+        if self.lora_layer.network_alpha is not None:
+            w_up = w_up * self.lora_layer.network_alpha / self.lora_layer.rank
+
+        fused_weight = w_orig + torch.bmm(w_up[None, :], w_down[None, :])[0]
+        self.weight.data = fused_weight.to(device=device, dtype=dtype)
+
+        # we can drop the lora layer now
+        self.lora_layer = None
+
     def forward(self, x):
         if self.lora_layer is None:
             return super().forward(x)
diff --git a/src/diffusers/models/unet_2d_condition.py b/src/diffusers/models/unet_2d_condition.py
index cede2ed9d36a..a866f91c70c6 100644
--- a/src/diffusers/models/unet_2d_condition.py
+++ b/src/diffusers/models/unet_2d_condition.py
@@ -697,6 +697,13 @@ def _set_gradient_checkpointing(self, module, value=False):
         if isinstance(module, (CrossAttnDownBlock2D, DownBlock2D, CrossAttnUpBlock2D, UpBlock2D)):
             module.gradient_checkpointing = value
 
+    def fuse_lora(self):
+        self.apply(self._fuse_lora_apply)
+
+    def _fuse_lora_apply(self, module):
+        if hasattr(module, "_fuse_lora"):
+            module._fuse_lora()
+
     def forward(
         self,
         sample: torch.FloatTensor,

From 697a6a79a23b4448e083f48b35cdd4d2c80cda28 Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Thu, 24 Aug 2023 12:35:21 +0530
Subject: [PATCH 02/56] initial implementation.

---
 src/diffusers/loaders.py                  | 14 +++++
 src/diffusers/models/lora.py              | 74 ++++++++++++++++++++++-
 src/diffusers/models/unet_2d_condition.py |  7 ---
 tests/models/test_lora_layers.py          | 21 +++++++
 4 files changed, 107 insertions(+), 9 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index 5e539b681aef..5caf343a7f19 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -573,6 +573,20 @@ def save_function(weights, filename):
         save_function(state_dict, os.path.join(save_directory, weight_name))
         logger.info(f"Model weights saved in {os.path.join(save_directory, weight_name)}")
 
+    def fuse_lora(self):
+        self.apply(self._fuse_lora_apply)
+
+    def _fuse_lora_apply(self, module):
+        if hasattr(module, "_fuse_lora"):
+            module._fuse_lora()
+
+    def unfuse_lora(self):
+        self.apply(self._unfuse_lora_apply)
+
+    def _unfuse_lora_apply(self, module):
+        if hasattr(module, "_unfuse_lora"):
+            module._unfuse_lora()
+
 
 class TextualInversionLoaderMixin:
     r"""
diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index cd366f96fa3a..0a3562b54892 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -12,16 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import warnings
 from typing import Optional
 
+import torch
 import torch.nn.functional as F
 from torch import nn
-import torch
+
 from ..utils import logging
 
 
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
 
+
 class LoRALinearLayer(nn.Module):
     def __init__(self, in_features, out_features, rank=4, network_alpha=None, device=None, dtype=None):
         super().__init__()
@@ -99,6 +102,52 @@ def __init__(self, *args, lora_layer: Optional[LoRAConv2dLayer] = None, **kwargs
     def set_lora_layer(self, lora_layer: Optional[LoRAConv2dLayer]):
         self.lora_layer = lora_layer
 
+    def _fuse_lora(self):
+        if self.lora_layer is None:
+            warnings.warn("Calling fuse_lora() is not supported. It will be a no-op.", RuntimeWarning)
+            return
+
+        dtype, device = self.weight.data.dtype, self.weight.data.device
+        logger.info(f"Fusing LoRA weights for {self.__class__}")
+
+        w_orig = self.weight.data.float()
+        w_up = self.lora_layer.up.weight.data.float()
+        w_down = self.lora_layer.down.weight.data.float()
+
+        if self.lora_layer.network_alpha is not None:
+            w_up = w_up * self.lora_layer.network_alpha / self.lora_layer.rank
+
+        fusion = torch.mm(w_up.flatten(start_dim=1), w_down.flatten(start_dim=1))
+        fusion = fusion.reshape((w_orig.shape))
+        fused_weight = w_orig + fusion
+        self.weight.data = fused_weight.to(device=device, dtype=dtype)
+
+        # we can drop the lora layer now
+        self.lora_layer = None
+
+        # offload the up and down matrices to CPU to not blow the memory
+        self.w_up = w_up.cpu()
+        self.w_down = w_down.cpu()
+
+    def _unfuse_lora(self):
+        if not (hasattr(self, "w_up") and hasattr(self, "w_down")):
+            return
+        logger.info(f"Unfusing LoRA weights for {self.__class__}")
+
+        fused_weight = self.weight
+        dtype, device = fused_weight.data.dtype, fused_weight.data.device
+
+        self.w_up = self.w_up.to(device=device, dtype=dtype)
+        self.w_down = self.w_down.to(device, dtype=dtype)
+
+        fusion = torch.mm(self.w_up.flatten(start_dim=1), self.w_down.flatten(start_dim=1))
+        fusion = fusion.reshape((fused_weight.shape))
+        unfused_weight = fused_weight - fusion
+        self.weight.data = unfused_weight.to(device=device, dtype=dtype)
+
+        self.w_up = None
+        self.down = None
+
     def forward(self, x):
         if self.lora_layer is None:
             # make sure to the functional Conv2D function as otherwise torch.compile's graph will break
@@ -122,6 +171,7 @@ def set_lora_layer(self, lora_layer: Optional[LoRAConv2dLayer]):
 
     def _fuse_lora(self):
         if self.lora_layer is None:
+            warnings.warn("Calling fuse_lora() is not supported. It will be a no-op.", RuntimeWarning)
             return
 
         dtype, device = self.weight.data.dtype, self.weight.data.device
@@ -129,8 +179,8 @@ def _fuse_lora(self):
 
         w_orig = self.weight.data.float()
         w_up = self.lora_layer.up.weight.data.float()
-
         w_down = self.lora_layer.down.weight.data.float()
+
         if self.lora_layer.network_alpha is not None:
             w_up = w_up * self.lora_layer.network_alpha / self.lora_layer.rank
 
@@ -140,6 +190,26 @@ def _fuse_lora(self):
         # we can drop the lora layer now
         self.lora_layer = None
 
+        # offload the up and down matrices to CPU to not blow the memory
+        self.w_up = w_up.cpu()
+        self.w_down = w_down.cpu()
+
+    def _unfuse_lora(self):
+        if not (hasattr(self, "w_up") and hasattr(self, "w_down")):
+            return
+        logger.info(f"Unfusing LoRA weights for {self.__class__}")
+
+        fused_weight = self.weight
+        dtype, device = fused_weight.data.dtype, fused_weight.data.device
+
+        self.w_up = self.w_up.to(device=device, dtype=dtype)
+        self.w_down = self.w_down.to(device, dtype=dtype)
+        unfused_weight = fused_weight - torch.bmm(self.w_up[None, :], self.w_down[None, :])[0]
+        self.weight.data = unfused_weight.to(device=device, dtype=dtype)
+
+        self.w_up = None
+        self.w_down = None
+
     def forward(self, x):
         if self.lora_layer is None:
             return super().forward(x)
diff --git a/src/diffusers/models/unet_2d_condition.py b/src/diffusers/models/unet_2d_condition.py
index a866f91c70c6..cede2ed9d36a 100644
--- a/src/diffusers/models/unet_2d_condition.py
+++ b/src/diffusers/models/unet_2d_condition.py
@@ -697,13 +697,6 @@ def _set_gradient_checkpointing(self, module, value=False):
         if isinstance(module, (CrossAttnDownBlock2D, DownBlock2D, CrossAttnUpBlock2D, UpBlock2D)):
             module.gradient_checkpointing = value
 
-    def fuse_lora(self):
-        self.apply(self._fuse_lora_apply)
-
-    def _fuse_lora_apply(self, module):
-        if hasattr(module, "_fuse_lora"):
-            module._fuse_lora()
-
     def forward(
         self,
         sample: torch.FloatTensor,
diff --git a/tests/models/test_lora_layers.py b/tests/models/test_lora_layers.py
index 6bcc6e82c585..f3a202ba9025 100644
--- a/tests/models/test_lora_layers.py
+++ b/tests/models/test_lora_layers.py
@@ -679,6 +679,27 @@ def test_unload_lora_sdxl(self):
             orig_image_slice, orig_image_slice_two, atol=1e-3
         ), "Unloading LoRA parameters should lead to results similar to what was obtained with the pipeline without any LoRA parameters."
 
+    def test_lora_fusion_warning(self):
+        pipeline_components, lora_components = self.get_dummy_components()
+        sd_pipe = StableDiffusionXLPipeline(**pipeline_components)
+
+        # Emulate training.
+        set_lora_weights(lora_components["unet_lora_layers"].parameters(), randn_weight=True)
+
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            StableDiffusionXLPipeline.save_lora_weights(
+                save_directory=tmpdirname,
+                unet_lora_layers=lora_components["unet_lora_layers"],
+                text_encoder_lora_layers=None,
+                text_encoder_2_lora_layers=None,
+                safe_serialization=True,
+            )
+            self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
+            sd_pipe.load_lora_weights(tmpdirname)
+
+            with self.assertWarns(RuntimeWarning):
+                sd_pipe.unet.fuse_lora()
+
 
 @slow
 @require_torch_gpu

From 703e9aa220ba53e73c183b406cc42dfbfd9eab03 Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Thu, 24 Aug 2023 13:19:00 +0530
Subject: [PATCH 03/56] add slow test one.

---
 tests/models/test_lora_layers.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/tests/models/test_lora_layers.py b/tests/models/test_lora_layers.py
index 5c372b02e0e2..edad0b4d44e6 100644
--- a/tests/models/test_lora_layers.py
+++ b/tests/models/test_lora_layers.py
@@ -963,3 +963,24 @@ def test_sdxl_1_0_lora(self):
         expected = np.array([0.4468, 0.4087, 0.4134, 0.366, 0.3202, 0.3505, 0.3786, 0.387, 0.3535])
 
         self.assertTrue(np.allclose(images, expected, atol=1e-4))
+
+    def test_sdxl_1_0_lora_fusion(self):
+        generator = torch.Generator().manual_seed(0)
+
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
+        pipe.enable_model_cpu_offload()
+        lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
+        lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+        pipe.unet.fuse_lora()
+
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+
+        images = images[0, -3:, -3:, -1].flatten()
+        # This way we also test equivalence between LoRA fusion and the non-fusion behaviour.
+        expected = np.array([0.4468, 0.4087, 0.4134, 0.366, 0.3202, 0.3505, 0.3786, 0.387, 0.3535])
+
+        self.assertTrue(np.allclose(images, expected, atol=1e-4))
+

From f9a773709b8b9a6484f81b3fe62504d2cedb540b Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Thu, 24 Aug 2023 13:24:24 +0530
Subject: [PATCH 04/56] styling

---
 tests/models/test_lora_layers.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/models/test_lora_layers.py b/tests/models/test_lora_layers.py
index edad0b4d44e6..f0a0f4c194df 100644
--- a/tests/models/test_lora_layers.py
+++ b/tests/models/test_lora_layers.py
@@ -983,4 +983,3 @@ def test_sdxl_1_0_lora_fusion(self):
         expected = np.array([0.4468, 0.4087, 0.4134, 0.366, 0.3202, 0.3505, 0.3786, 0.387, 0.3535])
 
         self.assertTrue(np.allclose(images, expected, atol=1e-4))
-

From 15b7652f1fd4e0a1b1cfef7a4d922c0a8545fe10 Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Thu, 24 Aug 2023 13:39:56 +0530
Subject: [PATCH 05/56] add: test for checking efficiency

---
 tests/models/test_lora_layers.py | 35 ++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/tests/models/test_lora_layers.py b/tests/models/test_lora_layers.py
index f0a0f4c194df..1db2dddbdda9 100644
--- a/tests/models/test_lora_layers.py
+++ b/tests/models/test_lora_layers.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 import os
 import tempfile
+import time
 import unittest
 
 import numpy as np
@@ -983,3 +984,37 @@ def test_sdxl_1_0_lora_fusion(self):
         expected = np.array([0.4468, 0.4087, 0.4134, 0.366, 0.3202, 0.3505, 0.3786, 0.387, 0.3535])
 
         self.assertTrue(np.allclose(images, expected, atol=1e-4))
+
+    def test_sdxl_1_0_lora_fusion_efficiency(self):
+        generator = torch.Generator().manual_seed(0)
+
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
+        pipe.enable_model_cpu_offload()
+        lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
+        lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+
+        start_time = time.time()
+        for _ in range(3):
+            pipe(
+                "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+            ).images
+        end_time = time.time()
+        elapsed_time_non_fusion = end_time - start_time
+
+        del pipe
+
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
+        pipe.enable_model_cpu_offload()
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+        pipe.unet.fuse_lora()
+
+        start_time = time.time()
+        for _ in range(3):
+            pipe(
+                "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+            ).images
+        end_time = time.time()
+        elapsed_time_fusion = end_time - start_time
+
+        self.assertTrue(elapsed_time_fusion < elapsed_time_non_fusion)

From b4a9a44583d91bfaa6c4442745e9629ae0c6bfe6 Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Thu, 24 Aug 2023 13:50:46 +0530
Subject: [PATCH 06/56] print

---
 tests/models/test_lora_layers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/models/test_lora_layers.py b/tests/models/test_lora_layers.py
index 1db2dddbdda9..b2755fc5f7da 100644
--- a/tests/models/test_lora_layers.py
+++ b/tests/models/test_lora_layers.py
@@ -1016,5 +1016,5 @@ def test_sdxl_1_0_lora_fusion_efficiency(self):
             ).images
         end_time = time.time()
         elapsed_time_fusion = end_time - start_time
-
+        print(f"elapsed_time_fusion: {elapsed_time_fusion}, elapsed_time_non_fusion: {elapsed_time_non_fusion}")
         self.assertTrue(elapsed_time_fusion < elapsed_time_non_fusion)

From a6d64024329875344c8f13c15e18f91f608a565d Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Thu, 24 Aug 2023 14:04:59 +0530
Subject: [PATCH 07/56] position

---
 tests/models/test_lora_layers.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/models/test_lora_layers.py b/tests/models/test_lora_layers.py
index b2755fc5f7da..43c8021a4062 100644
--- a/tests/models/test_lora_layers.py
+++ b/tests/models/test_lora_layers.py
@@ -987,12 +987,12 @@ def test_sdxl_1_0_lora_fusion(self):
 
     def test_sdxl_1_0_lora_fusion_efficiency(self):
         generator = torch.Generator().manual_seed(0)
-
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
-        pipe.enable_model_cpu_offload()
         lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
         lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
+
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
         pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+        pipe.enable_model_cpu_offload()
 
         start_time = time.time()
         for _ in range(3):
@@ -1005,9 +1005,9 @@ def test_sdxl_1_0_lora_fusion_efficiency(self):
         del pipe
 
         pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
-        pipe.enable_model_cpu_offload()
         pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
         pipe.unet.fuse_lora()
+        pipe.enable_model_cpu_offload()
 
         start_time = time.time()
         for _ in range(3):

From a167a744cfc3f322fffe89dbf449a9a53f8a920e Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Thu, 24 Aug 2023 14:16:35 +0530
Subject: [PATCH 08/56] place model offload correctly

---
 tests/models/test_lora_layers.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/models/test_lora_layers.py b/tests/models/test_lora_layers.py
index 43c8021a4062..dd7e22b87c7a 100644
--- a/tests/models/test_lora_layers.py
+++ b/tests/models/test_lora_layers.py
@@ -897,10 +897,10 @@ def test_sdxl_0_9_lora_one(self):
         generator = torch.Generator().manual_seed(0)
 
         pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9")
-        pipe.enable_model_cpu_offload()
         lora_model_id = "hf-internal-testing/sdxl-0.9-daiton-lora"
         lora_filename = "daiton-xl-lora-test.safetensors"
         pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+        pipe.enable_model_cpu_offload()
 
         images = pipe(
             "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
@@ -915,10 +915,10 @@ def test_sdxl_0_9_lora_two(self):
         generator = torch.Generator().manual_seed(0)
 
         pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9")
-        pipe.enable_model_cpu_offload()
         lora_model_id = "hf-internal-testing/sdxl-0.9-costumes-lora"
         lora_filename = "saijo.safetensors"
         pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+        pipe.enable_model_cpu_offload()
 
         images = pipe(
             "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
@@ -933,10 +933,10 @@ def test_sdxl_0_9_lora_three(self):
         generator = torch.Generator().manual_seed(0)
 
         pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9")
-        pipe.enable_model_cpu_offload()
         lora_model_id = "hf-internal-testing/sdxl-0.9-kamepan-lora"
         lora_filename = "kame_sdxl_v2-000020-16rank.safetensors"
         pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+        pipe.enable_model_cpu_offload()
 
         images = pipe(
             "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
@@ -951,10 +951,10 @@ def test_sdxl_1_0_lora(self):
         generator = torch.Generator().manual_seed(0)
 
         pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
-        pipe.enable_model_cpu_offload()
         lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
         lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
         pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+        pipe.enable_model_cpu_offload()
 
         images = pipe(
             "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
@@ -969,11 +969,11 @@ def test_sdxl_1_0_lora_fusion(self):
         generator = torch.Generator().manual_seed(0)
 
         pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
-        pipe.enable_model_cpu_offload()
         lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
         lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
         pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
         pipe.unet.fuse_lora()
+        pipe.enable_model_cpu_offload()
 
         images = pipe(
             "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
@@ -1016,5 +1016,5 @@ def test_sdxl_1_0_lora_fusion_efficiency(self):
             ).images
         end_time = time.time()
         elapsed_time_fusion = end_time - start_time
-        print(f"elapsed_time_fusion: {elapsed_time_fusion}, elapsed_time_non_fusion: {elapsed_time_non_fusion}")
+        
         self.assertTrue(elapsed_time_fusion < elapsed_time_non_fusion)

From 14aa4239e9206a09b43884f121b7e32519c77dee Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Thu, 24 Aug 2023 14:29:04 +0530
Subject: [PATCH 09/56] style

---
 tests/models/test_lora_layers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/models/test_lora_layers.py b/tests/models/test_lora_layers.py
index dd7e22b87c7a..e92f7bdd44ed 100644
--- a/tests/models/test_lora_layers.py
+++ b/tests/models/test_lora_layers.py
@@ -1016,5 +1016,5 @@ def test_sdxl_1_0_lora_fusion_efficiency(self):
             ).images
         end_time = time.time()
         elapsed_time_fusion = end_time - start_time
-        
+
         self.assertTrue(elapsed_time_fusion < elapsed_time_non_fusion)

From 16311f78b2ab22382b25b40a6b5a9286757e0c97 Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Thu, 24 Aug 2023 14:44:10 +0530
Subject: [PATCH 10/56] style.

---
 src/diffusers/models/lora.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index 9b8822f74faa..c14a9f04115e 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -128,7 +128,7 @@ def _unfuse_lora(self):
             return
         logger.info(f"Unfusing LoRA weights for {self.__class__}")
 
-        fused_weight = self.weight
+        fused_weight = self.weight.data
         dtype, device = fused_weight.data.dtype, fused_weight.data.device
 
         self.w_up = self.w_up.to(device=device, dtype=dtype)
@@ -193,8 +193,8 @@ def _unfuse_lora(self):
             return
         logger.info(f"Unfusing LoRA weights for {self.__class__}")
 
-        fused_weight = self.weight
-        dtype, device = fused_weight.data.dtype, fused_weight.data.device
+        fused_weight = self.weight.data
+        dtype, device = fused_weight.dtype, fused_weight.device
 
         self.w_up = self.w_up.to(device=device, dtype=dtype)
         self.w_down = self.w_down.to(device, dtype=dtype)

From a355544648c68a78c7f95169a492919f38612cd8 Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Thu, 24 Aug 2023 14:59:56 +0530
Subject: [PATCH 11/56] unfuse test.

---
 tests/models/test_lora_layers.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/tests/models/test_lora_layers.py b/tests/models/test_lora_layers.py
index e92f7bdd44ed..5668b8655d18 100644
--- a/tests/models/test_lora_layers.py
+++ b/tests/models/test_lora_layers.py
@@ -985,6 +985,30 @@ def test_sdxl_1_0_lora_fusion(self):
 
         self.assertTrue(np.allclose(images, expected, atol=1e-4))
 
+    def test_sdxl_1_0_lora_unfusion(self):
+        generator = torch.Generator().manual_seed(0)
+
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
+        lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
+        lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+        pipe.unet.fuse_lora()
+        pipe.enable_model_cpu_offload()
+
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+
+        images_with_fusion = images[0, -3:, -3:, -1].flatten()
+
+        pipe.unet.unfuse_lora()
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+        images_without_fusion = images[0, -3:, -3:, -1].flatten()
+
+        self.assertFalse(np.allclose(images_with_fusion, images_without_fusion))
+
     def test_sdxl_1_0_lora_fusion_efficiency(self):
         generator = torch.Generator().manual_seed(0)
         lora_model_id = "hf-internal-testing/sdxl-1.0-lora"

From d8050b5eff6722c3b99bee103865c398c93a545f Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Thu, 24 Aug 2023 15:28:27 +0530
Subject: [PATCH 12/56] final checks

---
 tests/models/test_lora_layers.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/models/test_lora_layers.py b/tests/models/test_lora_layers.py
index 5668b8655d18..5f055ea75ad0 100644
--- a/tests/models/test_lora_layers.py
+++ b/tests/models/test_lora_layers.py
@@ -998,7 +998,6 @@ def test_sdxl_1_0_lora_unfusion(self):
         images = pipe(
             "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
         ).images
-
         images_with_fusion = images[0, -3:, -3:, -1].flatten()
 
         pipe.unet.unfuse_lora()

From a976466515258ea18ef11c80a60e3c45ed87e156 Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 10:57:22 +0530
Subject: [PATCH 13/56] remove warning test

---
 tests/models/test_lora_layers.py | 21 ---------------------
 1 file changed, 21 deletions(-)

diff --git a/tests/models/test_lora_layers.py b/tests/models/test_lora_layers.py
index 4b299f2ba62b..edb4a4d12d57 100644
--- a/tests/models/test_lora_layers.py
+++ b/tests/models/test_lora_layers.py
@@ -655,27 +655,6 @@ def test_unload_lora_sdxl(self):
             orig_image_slice, orig_image_slice_two, atol=1e-3
         ), "Unloading LoRA parameters should lead to results similar to what was obtained with the pipeline without any LoRA parameters."
 
-    def test_lora_fusion_warning(self):
-        pipeline_components, lora_components = self.get_dummy_components()
-        sd_pipe = StableDiffusionXLPipeline(**pipeline_components)
-
-        # Emulate training.
-        set_lora_weights(lora_components["unet_lora_layers"].parameters(), randn_weight=True)
-
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            StableDiffusionXLPipeline.save_lora_weights(
-                save_directory=tmpdirname,
-                unet_lora_layers=lora_components["unet_lora_layers"],
-                text_encoder_lora_layers=None,
-                text_encoder_2_lora_layers=None,
-                safe_serialization=True,
-            )
-            self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
-            sd_pipe.load_lora_weights(tmpdirname)
-
-            with self.assertWarns(RuntimeWarning):
-                sd_pipe.unet.fuse_lora()
-
     def test_load_lora_locally(self):
         pipeline_components, lora_components = self.get_dummy_components()
         sd_pipe = StableDiffusionXLPipeline(**pipeline_components)

From b7189228a0988f5ea56a28bfdd0abdebf60c2be7 Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 10:59:07 +0530
Subject: [PATCH 14/56] remove warnings altogether

---
 src/diffusers/models/lora.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index 30c437cfe544..11249fe7473f 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -99,7 +99,6 @@ def set_lora_layer(self, lora_layer: Optional[LoRAConv2dLayer]):
 
     def _fuse_lora(self):
         if self.lora_layer is None:
-            logger.warn("Calling fuse_lora() is not supported. It will be a no-op.")
             return
 
         dtype, device = self.weight.data.dtype, self.weight.data.device
@@ -166,7 +165,6 @@ def set_lora_layer(self, lora_layer: Optional[LoRAConv2dLayer]):
 
     def _fuse_lora(self):
         if self.lora_layer is None:
-            logger.warn("Calling fuse_lora() is not supported. It will be a no-op.")
             return
 
         dtype, device = self.weight.data.dtype, self.weight.data.device

From 782367d83f3f12df96ffe25bebea5ce272637645 Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 11:34:00 +0530
Subject: [PATCH 15/56] debugging

---
 src/diffusers/models/lora.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index 11249fe7473f..87c123d5ed1d 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -140,10 +140,11 @@ def _unfuse_lora(self):
         self.weight.data = unfused_weight.to(device=device, dtype=dtype)
 
         self.w_up = None
-        self.down = None
+        self.w_down = None
 
     def forward(self, x):
         if self.lora_layer is None:
+            print(self.w_up.shape, self.w_down.shape)
             # make sure to the functional Conv2D function as otherwise torch.compile's graph will break
             # see: https://github.com/huggingface/diffusers/pull/4315
             return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
@@ -205,6 +206,7 @@ def _unfuse_lora(self):
 
     def forward(self, hidden_states, lora_scale: int = 1):
         if self.lora_layer is None:
+            print(self.w_up.shape, self.w_down.shape)
             return super().forward(hidden_states)
         else:
             return super().forward(hidden_states) + lora_scale * self.lora_layer(hidden_states)

From 84f63e823967f3394c483490f3500b2abe68f99b Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 11:45:05 +0530
Subject: [PATCH 16/56] tighten up tests.

---
 tests/models/test_lora_layers.py | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/tests/models/test_lora_layers.py b/tests/models/test_lora_layers.py
index edb4a4d12d57..14fcf4a2aace 100644
--- a/tests/models/test_lora_layers.py
+++ b/tests/models/test_lora_layers.py
@@ -989,6 +989,35 @@ def test_sdxl_1_0_lora_unfusion(self):
 
         self.assertFalse(np.allclose(images_with_fusion, images_without_fusion))
 
+    def test_sdxl_1_0_lora_unfusion_effectivity(self):
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
+        pipe.enable_model_cpu_offload()
+
+        generator = torch.Generator().manual_seed(0)
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+        original_image_slice = images[0, -3:, -3:, -1].flatten()
+
+        lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
+        lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+        pipe.unet.fuse_lora()
+
+        generator = torch.Generator().manual_seed(0)
+        _ = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+
+        pipe.unet.unfuse_lora()
+        generator = torch.Generator().manual_seed(0)
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+        images_without_fusion_slice = images[0, -3:, -3:, -1].flatten()
+
+        self.assertTrue(np.allclose(original_image_slice, images_without_fusion_slice, atol=1e-3))
+
     def test_sdxl_1_0_lora_fusion_efficiency(self):
         generator = torch.Generator().manual_seed(0)
         lora_model_id = "hf-internal-testing/sdxl-1.0-lora"

From 4a2e6c41d32951f272e4b9be5f60527c7ad63e9c Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 11:46:18 +0530
Subject: [PATCH 17/56] debugging

---
 src/diffusers/models/lora.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index 87c123d5ed1d..960e3fd72c33 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -144,7 +144,8 @@ def _unfuse_lora(self):
 
     def forward(self, x):
         if self.lora_layer is None:
-            print(self.w_up.shape, self.w_down.shape)
+            if hasattr(self, "w_up"):
+                print(self.w_up.shape, self.w_down.shape)
             # make sure to the functional Conv2D function as otherwise torch.compile's graph will break
             # see: https://github.com/huggingface/diffusers/pull/4315
             return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)

From 1b07e43bc6740f87c9aaa1902d1889035bf658c2 Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 11:49:25 +0530
Subject: [PATCH 18/56] debugging

---
 src/diffusers/models/lora.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index 960e3fd72c33..a05d6ce6cf38 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -188,6 +188,7 @@ def _fuse_lora(self):
         # offload the up and down matrices to CPU to not blow the memory
         self.w_up = w_up.cpu()
         self.w_down = w_down.cpu()
+        print(f"From {self.__class__}: {self.w_up.shape}, {self.w_down.shape}")
 
     def _unfuse_lora(self):
         if not (hasattr(self, "w_up") and hasattr(self, "w_down")):

From 0d69dded32522d1130c51179d795a4b4bf6ff38d Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 11:56:49 +0530
Subject: [PATCH 19/56] debugging

---
 src/diffusers/models/lora.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index a05d6ce6cf38..f92630a3c79e 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -161,6 +161,7 @@ class LoRACompatibleLinear(nn.Linear):
     def __init__(self, *args, lora_layer: Optional[LoRALinearLayer] = None, **kwargs):
         super().__init__(*args, **kwargs)
         self.lora_layer = lora_layer
+        self.hello = torch.tensor([0.20]).cpu()
 
     def set_lora_layer(self, lora_layer: Optional[LoRAConv2dLayer]):
         self.lora_layer = lora_layer
@@ -188,6 +189,7 @@ def _fuse_lora(self):
         # offload the up and down matrices to CPU to not blow the memory
         self.w_up = w_up.cpu()
         self.w_down = w_down.cpu()
+        self.hello = self.hello.cpu()
         print(f"From {self.__class__}: {self.w_up.shape}, {self.w_down.shape}")
 
     def _unfuse_lora(self):
@@ -208,6 +210,7 @@ def _unfuse_lora(self):
 
     def forward(self, hidden_states, lora_scale: int = 1):
         if self.lora_layer is None:
+            print(self.hello.shape)
             print(self.w_up.shape, self.w_down.shape)
             return super().forward(hidden_states)
         else:

From caa79ed18126144c944ef3ee0d9c4e39e3657a07 Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 11:59:14 +0530
Subject: [PATCH 20/56] debugging

---
 src/diffusers/models/lora.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index f92630a3c79e..cf645d65afd0 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -161,7 +161,6 @@ class LoRACompatibleLinear(nn.Linear):
     def __init__(self, *args, lora_layer: Optional[LoRALinearLayer] = None, **kwargs):
         super().__init__(*args, **kwargs)
         self.lora_layer = lora_layer
-        self.hello = torch.tensor([0.20]).cpu()
 
     def set_lora_layer(self, lora_layer: Optional[LoRAConv2dLayer]):
         self.lora_layer = lora_layer
@@ -189,8 +188,7 @@ def _fuse_lora(self):
         # offload the up and down matrices to CPU to not blow the memory
         self.w_up = w_up.cpu()
         self.w_down = w_down.cpu()
-        self.hello = self.hello.cpu()
-        print(f"From {self.__class__}: {self.w_up.shape}, {self.w_down.shape}")
+        self.hello = torch.tensor([20]).cpu()
 
     def _unfuse_lora(self):
         if not (hasattr(self, "w_up") and hasattr(self, "w_down")):

From 345057d379027bcab12431f067800da60e4dfc93 Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 12:02:23 +0530
Subject: [PATCH 21/56] debugging

---
 src/diffusers/models/lora.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index cf645d65afd0..c959fa4f493d 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -161,6 +161,7 @@ class LoRACompatibleLinear(nn.Linear):
     def __init__(self, *args, lora_layer: Optional[LoRALinearLayer] = None, **kwargs):
         super().__init__(*args, **kwargs)
         self.lora_layer = lora_layer
+        self.hello = None
 
     def set_lora_layer(self, lora_layer: Optional[LoRAConv2dLayer]):
         self.lora_layer = lora_layer

From f26a62a1d6fd21984c52612bd89e7a7ef4815e4d Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 12:09:59 +0530
Subject: [PATCH 22/56] debugging

---
 src/diffusers/models/lora.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index c959fa4f493d..c14b0462858b 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -182,6 +182,7 @@ def _fuse_lora(self):
 
         fused_weight = w_orig + torch.bmm(w_up[None, :], w_down[None, :])[0]
         self.weight.data = fused_weight.to(device=device, dtype=dtype)
+        print(self.weight.data.shape)
 
         # we can drop the lora layer now
         self.lora_layer = None

From bc2282cd317457207cef0c782d48cbb437ea4fbb Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 12:13:45 +0530
Subject: [PATCH 23/56] debugging

---
 src/diffusers/models/lora.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index c14b0462858b..d38a1c10c3e7 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -161,7 +161,6 @@ class LoRACompatibleLinear(nn.Linear):
     def __init__(self, *args, lora_layer: Optional[LoRALinearLayer] = None, **kwargs):
         super().__init__(*args, **kwargs)
         self.lora_layer = lora_layer
-        self.hello = None
 
     def set_lora_layer(self, lora_layer: Optional[LoRAConv2dLayer]):
         self.lora_layer = lora_layer
@@ -182,7 +181,6 @@ def _fuse_lora(self):
 
         fused_weight = w_orig + torch.bmm(w_up[None, :], w_down[None, :])[0]
         self.weight.data = fused_weight.to(device=device, dtype=dtype)
-        print(self.weight.data.shape)
 
         # we can drop the lora layer now
         self.lora_layer = None
@@ -190,7 +188,7 @@ def _fuse_lora(self):
         # offload the up and down matrices to CPU to not blow the memory
         self.w_up = w_up.cpu()
         self.w_down = w_down.cpu()
-        self.hello = torch.tensor([20]).cpu()
+        setattr(self, "hello", torch.tensor([20]).cpu())
 
     def _unfuse_lora(self):
         if not (hasattr(self, "w_up") and hasattr(self, "w_down")):

From ced2a90346f31fa7567de0f520f40f35d6948a3a Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 12:19:46 +0530
Subject: [PATCH 24/56] debugging

---
 src/diffusers/models/lora.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index d38a1c10c3e7..cf645d65afd0 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -188,7 +188,7 @@ def _fuse_lora(self):
         # offload the up and down matrices to CPU to not blow the memory
         self.w_up = w_up.cpu()
         self.w_down = w_down.cpu()
-        setattr(self, "hello", torch.tensor([20]).cpu())
+        self.hello = torch.tensor([20]).cpu()
 
     def _unfuse_lora(self):
         if not (hasattr(self, "w_up") and hasattr(self, "w_down")):

From ff78a58b7b3bd033ed0f5c53e10049ef2659bd4a Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 12:24:25 +0530
Subject: [PATCH 25/56] debugging

---
 src/diffusers/models/lora.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index cf645d65afd0..f40699eb7496 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -164,6 +164,7 @@ def __init__(self, *args, lora_layer: Optional[LoRALinearLayer] = None, **kwargs
 
     def set_lora_layer(self, lora_layer: Optional[LoRAConv2dLayer]):
         self.lora_layer = lora_layer
+        self.hello = torch.tensor([20]).cpu()
 
     def _fuse_lora(self):
         if self.lora_layer is None:
@@ -188,7 +189,6 @@ def _fuse_lora(self):
         # offload the up and down matrices to CPU to not blow the memory
         self.w_up = w_up.cpu()
         self.w_down = w_down.cpu()
-        self.hello = torch.tensor([20]).cpu()
 
     def _unfuse_lora(self):
         if not (hasattr(self, "w_up") and hasattr(self, "w_down")):

From 036a9bca7fd915453406e06254bb012c63ab1313 Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 12:26:39 +0530
Subject: [PATCH 26/56] denugging

---
 src/diffusers/models/lora.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index f40699eb7496..c9d526cb5816 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -161,8 +161,9 @@ class LoRACompatibleLinear(nn.Linear):
     def __init__(self, *args, lora_layer: Optional[LoRALinearLayer] = None, **kwargs):
         super().__init__(*args, **kwargs)
         self.lora_layer = lora_layer
+        self.hello = None
 
-    def set_lora_layer(self, lora_layer: Optional[LoRAConv2dLayer]):
+    def set_lora_layer(self, lora_layer: Optional[LoRALinearLayer]):
         self.lora_layer = lora_layer
         self.hello = torch.tensor([20]).cpu()
 

From 1c6970ff4e22e96d20ff513257619246ec2d9811 Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 12:27:45 +0530
Subject: [PATCH 27/56] debugging

---
 src/diffusers/models/lora.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index c9d526cb5816..4ede93e9ecbc 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -165,7 +165,7 @@ def __init__(self, *args, lora_layer: Optional[LoRALinearLayer] = None, **kwargs
 
     def set_lora_layer(self, lora_layer: Optional[LoRALinearLayer]):
         self.lora_layer = lora_layer
-        self.hello = torch.tensor([20]).cpu()
+        self.hello = torch.tensor([20])
 
     def _fuse_lora(self):
         if self.lora_layer is None:

From 9f7492f6a48b5c9c99b78c51bbf99231913b34e5 Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 12:29:30 +0530
Subject: [PATCH 28/56] debugging

---
 src/diffusers/models/lora.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index 4ede93e9ecbc..f4be056c3a12 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -209,6 +209,7 @@ def _unfuse_lora(self):
 
     def forward(self, hidden_states, lora_scale: int = 1):
         if self.lora_layer is None:
+            print(self.weight.data.shape)
             print(self.hello.shape)
             print(self.w_up.shape, self.w_down.shape)
             return super().forward(hidden_states)

From 975adf7789866441ee4a3d86b56546cc9a21c65b Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 12:52:19 +0530
Subject: [PATCH 29/56] debugging

---
 src/diffusers/models/lora.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index f4be056c3a12..a9e595355945 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -208,6 +208,10 @@ def _unfuse_lora(self):
         self.w_down = None
 
     def forward(self, hidden_states, lora_scale: int = 1):
+        all_members = dir(self)
+        # Filter out methods to get only attributes
+        attributes = [attr for attr in all_members if not callable(getattr(self, attr)) and not attr.startswith("__")]
+        print(attributes)
         if self.lora_layer is None:
             print(self.weight.data.shape)
             print(self.hello.shape)

From 0abf6fe96cb6097460f16c77dc77e5861ddcb62d Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 12:54:36 +0530
Subject: [PATCH 30/56] debugging

---
 src/diffusers/models/lora.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index a9e595355945..35262870a9ae 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -161,11 +161,9 @@ class LoRACompatibleLinear(nn.Linear):
     def __init__(self, *args, lora_layer: Optional[LoRALinearLayer] = None, **kwargs):
         super().__init__(*args, **kwargs)
         self.lora_layer = lora_layer
-        self.hello = None
 
     def set_lora_layer(self, lora_layer: Optional[LoRALinearLayer]):
         self.lora_layer = lora_layer
-        self.hello = torch.tensor([20])
 
     def _fuse_lora(self):
         if self.lora_layer is None:
@@ -210,7 +208,7 @@ def _unfuse_lora(self):
     def forward(self, hidden_states, lora_scale: int = 1):
         all_members = dir(self)
         # Filter out methods to get only attributes
-        attributes = [attr for attr in all_members if not callable(getattr(self, attr)) and not attr.startswith("__")]
+        attributes = [attr for attr in all_members if not callable(getattr(self, attr)) and not attr.startswith(("__", "_"))]
         print(attributes)
         if self.lora_layer is None:
             print(self.weight.data.shape)

From de916c66578147ae820ce6252073d7c2698a1467 Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 12:58:11 +0530
Subject: [PATCH 31/56] debugging

---
 src/diffusers/models/lora.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index 35262870a9ae..b235cfae9d2b 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -169,6 +169,7 @@ def _fuse_lora(self):
         if self.lora_layer is None:
             return
 
+        print(f"Before fusion: {self.lora_layer}")
         dtype, device = self.weight.data.dtype, self.weight.data.device
         logger.info(f"Fusing LoRA weights for {self.__class__}")
 
@@ -184,6 +185,7 @@ def _fuse_lora(self):
 
         # we can drop the lora layer now
         self.lora_layer = None
+        print(f"After fusion: {self.lora_layer}")
 
         # offload the up and down matrices to CPU to not blow the memory
         self.w_up = w_up.cpu()
@@ -206,8 +208,8 @@ def _unfuse_lora(self):
         self.w_down = None
 
     def forward(self, hidden_states, lora_scale: int = 1):
-        all_members = dir(self)
         # Filter out methods to get only attributes
+        all_members = dir(self)
         attributes = [attr for attr in all_members if not callable(getattr(self, attr)) and not attr.startswith(("__", "_"))]
         print(attributes)
         if self.lora_layer is None:

From e87e5dd804648fd9546aefcaaf68fc800e200aac Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 13:00:41 +0530
Subject: [PATCH 32/56] debugging

---
 src/diffusers/models/lora.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index b235cfae9d2b..be62df07cf86 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -161,15 +161,17 @@ class LoRACompatibleLinear(nn.Linear):
     def __init__(self, *args, lora_layer: Optional[LoRALinearLayer] = None, **kwargs):
         super().__init__(*args, **kwargs)
         self.lora_layer = lora_layer
+        self.hello = None
 
     def set_lora_layer(self, lora_layer: Optional[LoRALinearLayer]):
         self.lora_layer = lora_layer
+        self.hello = torch.tensor([20])
 
     def _fuse_lora(self):
         if self.lora_layer is None:
             return
 
-        print(f"Before fusion: {self.lora_layer}")
+        print(f"Before fusion: {self.lora_layer.__class__}")
         dtype, device = self.weight.data.dtype, self.weight.data.device
         logger.info(f"Fusing LoRA weights for {self.__class__}")
 
@@ -186,6 +188,7 @@ def _fuse_lora(self):
         # we can drop the lora layer now
         self.lora_layer = None
         print(f"After fusion: {self.lora_layer}")
+        print(self.hello)
 
         # offload the up and down matrices to CPU to not blow the memory
         self.w_up = w_up.cpu()
@@ -213,7 +216,6 @@ def forward(self, hidden_states, lora_scale: int = 1):
         attributes = [attr for attr in all_members if not callable(getattr(self, attr)) and not attr.startswith(("__", "_"))]
         print(attributes)
         if self.lora_layer is None:
-            print(self.weight.data.shape)
             print(self.hello.shape)
             print(self.w_up.shape, self.w_down.shape)
             return super().forward(hidden_states)

From e376b5816278d9647c0284ec2fa45f709375bcf2 Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 13:03:13 +0530
Subject: [PATCH 33/56] debugging

---
 src/diffusers/models/lora.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index be62df07cf86..bd9bf22f7fd2 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -191,8 +191,8 @@ def _fuse_lora(self):
         print(self.hello)
 
         # offload the up and down matrices to CPU to not blow the memory
-        self.w_up = w_up.cpu()
-        self.w_down = w_down.cpu()
+        # self.w_up = w_up.cpu()
+        # self.w_down = w_down.cpu()
 
     def _unfuse_lora(self):
         if not (hasattr(self, "w_up") and hasattr(self, "w_down")):

From 21f17b07b10c0d6df4dfbc88a8da1ab175175011 Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 13:12:26 +0530
Subject: [PATCH 34/56] debugging

---
 src/diffusers/models/lora.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index bd9bf22f7fd2..892c8840e155 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -187,12 +187,12 @@ def _fuse_lora(self):
 
         # we can drop the lora layer now
         self.lora_layer = None
-        print(f"After fusion: {self.lora_layer}")
-        print(self.hello)
+        self.hello = torch.tensor([30])
+        print(f"After fusion: {self.lora_layer}, {self.hello}")
 
         # offload the up and down matrices to CPU to not blow the memory
-        # self.w_up = w_up.cpu()
-        # self.w_down = w_down.cpu()
+        self.w_up = w_up.cpu()
+        self.w_down = w_down.cpu()
 
     def _unfuse_lora(self):
         if not (hasattr(self, "w_up") and hasattr(self, "w_down")):
@@ -211,10 +211,6 @@ def _unfuse_lora(self):
         self.w_down = None
 
     def forward(self, hidden_states, lora_scale: int = 1):
-        # Filter out methods to get only attributes
-        all_members = dir(self)
-        attributes = [attr for attr in all_members if not callable(getattr(self, attr)) and not attr.startswith(("__", "_"))]
-        print(attributes)
         if self.lora_layer is None:
             print(self.hello.shape)
             print(self.w_up.shape, self.w_down.shape)

From 553730504b9355be43fb98d25ba1033209144fd3 Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 13:18:34 +0530
Subject: [PATCH 35/56] debugging

---
 src/diffusers/models/lora.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index 892c8840e155..8b06b4ea3c0d 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -186,8 +186,8 @@ def _fuse_lora(self):
         self.weight.data = fused_weight.to(device=device, dtype=dtype)
 
         # we can drop the lora layer now
-        self.lora_layer = None
         self.hello = torch.tensor([30])
+        self.lora_layer = None
         print(f"After fusion: {self.lora_layer}, {self.hello}")
 
         # offload the up and down matrices to CPU to not blow the memory

From 73c07eeebb59fea4f599a2c1b30f6a63c30533c3 Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 13:25:02 +0530
Subject: [PATCH 36/56] debugging

---
 src/diffusers/models/lora.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index 8b06b4ea3c0d..cb0b3b45eb69 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -144,8 +144,6 @@ def _unfuse_lora(self):
 
     def forward(self, x):
         if self.lora_layer is None:
-            if hasattr(self, "w_up"):
-                print(self.w_up.shape, self.w_down.shape)
             # make sure to the functional Conv2D function as otherwise torch.compile's graph will break
             # see: https://github.com/huggingface/diffusers/pull/4315
             return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
@@ -161,17 +159,14 @@ class LoRACompatibleLinear(nn.Linear):
     def __init__(self, *args, lora_layer: Optional[LoRALinearLayer] = None, **kwargs):
         super().__init__(*args, **kwargs)
         self.lora_layer = lora_layer
-        self.hello = None
 
     def set_lora_layer(self, lora_layer: Optional[LoRALinearLayer]):
         self.lora_layer = lora_layer
-        self.hello = torch.tensor([20])
 
     def _fuse_lora(self):
         if self.lora_layer is None:
             return
 
-        print(f"Before fusion: {self.lora_layer.__class__}")
         dtype, device = self.weight.data.dtype, self.weight.data.device
         logger.info(f"Fusing LoRA weights for {self.__class__}")
 
@@ -186,9 +181,7 @@ def _fuse_lora(self):
         self.weight.data = fused_weight.to(device=device, dtype=dtype)
 
         # we can drop the lora layer now
-        self.hello = torch.tensor([30])
         self.lora_layer = None
-        print(f"After fusion: {self.lora_layer}, {self.hello}")
 
         # offload the up and down matrices to CPU to not blow the memory
         self.w_up = w_up.cpu()
@@ -212,8 +205,6 @@ def _unfuse_lora(self):
 
     def forward(self, hidden_states, lora_scale: int = 1):
         if self.lora_layer is None:
-            print(self.hello.shape)
-            print(self.w_up.shape, self.w_down.shape)
             return super().forward(hidden_states)
         else:
             return super().forward(hidden_states) + lora_scale * self.lora_layer(hidden_states)

From dc30b9dadff996bc489f0e9fe6b2a82df4910bad Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 13:30:24 +0530
Subject: [PATCH 37/56] debugging

---
 src/diffusers/loaders.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index a2f0f113bed1..14ca0688f151 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -1721,6 +1721,7 @@ def unload_lora_weights(self):
         """
         for _, module in self.unet.named_modules():
             if hasattr(module, "set_lora_layer"):
+                print(f"Called from unload_lora_weights(): {module.lora_layer}")
                 module.set_lora_layer(None)
 
         # Safe to call the following regardless of LoRA.

From 96c70e8003f5f62b11d6794e6680561dea423ad7 Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 13:37:04 +0530
Subject: [PATCH 38/56] debugging

---
 src/diffusers/models/lora.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index cb0b3b45eb69..7ccebd36cb82 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -158,6 +158,7 @@ class LoRACompatibleLinear(nn.Linear):
 
     def __init__(self, *args, lora_layer: Optional[LoRALinearLayer] = None, **kwargs):
         super().__init__(*args, **kwargs)
+        self.weight_original  = self.weight.data.clone()
         self.lora_layer = lora_layer
 
     def set_lora_layer(self, lora_layer: Optional[LoRALinearLayer]):
@@ -205,6 +206,7 @@ def _unfuse_lora(self):
 
     def forward(self, hidden_states, lora_scale: int = 1):
         if self.lora_layer is None:
+            print(f"Check if the original weights differ: {torch.allclose(self.weight_original, self.weight.data)}")
             return super().forward(hidden_states)
         else:
             return super().forward(hidden_states) + lora_scale * self.lora_layer(hidden_states)

From c5f37b5142a4100a1e34113946b17ac075dfc9f3 Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 13:40:43 +0530
Subject: [PATCH 39/56] debugging

---
 src/diffusers/models/lora.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index 7ccebd36cb82..21821bf750db 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -206,6 +206,8 @@ def _unfuse_lora(self):
 
     def forward(self, hidden_states, lora_scale: int = 1):
         if self.lora_layer is None:
+            if self.weight_original.dtype != self.weight.data.dtype:
+                self.weight_original = self.weight_original.to(dtype=self.weight.data.dtype)
             print(f"Check if the original weights differ: {torch.allclose(self.weight_original, self.weight.data)}")
             return super().forward(hidden_states)
         else:

From 854231bddac2e380087d15505d0f7e200d10d3cf Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 13:42:55 +0530
Subject: [PATCH 40/56] debugging

---
 src/diffusers/models/lora.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index 21821bf750db..b14b420b0fc9 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -158,7 +158,7 @@ class LoRACompatibleLinear(nn.Linear):
 
     def __init__(self, *args, lora_layer: Optional[LoRALinearLayer] = None, **kwargs):
         super().__init__(*args, **kwargs)
-        self.weight_original  = self.weight.data.clone()
+        self.weight_original  = None
         self.lora_layer = lora_layer
 
     def set_lora_layer(self, lora_layer: Optional[LoRALinearLayer]):
@@ -172,6 +172,7 @@ def _fuse_lora(self):
         logger.info(f"Fusing LoRA weights for {self.__class__}")
 
         w_orig = self.weight.data.float()
+        self.weight_original = w_orig.clone()
         w_up = self.lora_layer.up.weight.data.float()
         w_down = self.lora_layer.down.weight.data.float()
 
@@ -206,8 +207,6 @@ def _unfuse_lora(self):
 
     def forward(self, hidden_states, lora_scale: int = 1):
         if self.lora_layer is None:
-            if self.weight_original.dtype != self.weight.data.dtype:
-                self.weight_original = self.weight_original.to(dtype=self.weight.data.dtype)
             print(f"Check if the original weights differ: {torch.allclose(self.weight_original, self.weight.data)}")
             return super().forward(hidden_states)
         else:

From b00899ed3b0183e5d70cd4be5d0f2a6d39119dd4 Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 13:45:36 +0530
Subject: [PATCH 41/56] debuging

---
 src/diffusers/models/lora.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index b14b420b0fc9..c1de9714cfc0 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -158,7 +158,7 @@ class LoRACompatibleLinear(nn.Linear):
 
     def __init__(self, *args, lora_layer: Optional[LoRALinearLayer] = None, **kwargs):
         super().__init__(*args, **kwargs)
-        self.weight_original  = None
+        self.weight_original  = self.weight.data.clone()
         self.lora_layer = lora_layer
 
     def set_lora_layer(self, lora_layer: Optional[LoRALinearLayer]):
@@ -207,6 +207,7 @@ def _unfuse_lora(self):
 
     def forward(self, hidden_states, lora_scale: int = 1):
         if self.lora_layer is None:
+            self.weight_original = self.weight_original.to(device=self.weight.data.device, dtype=self.weight.data.dtype)
             print(f"Check if the original weights differ: {torch.allclose(self.weight_original, self.weight.data)}")
             return super().forward(hidden_states)
         else:

From 788b610f3a8f442c40796f4ee2d6e95cb32436af Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 13:48:22 +0530
Subject: [PATCH 42/56] debugging

---
 src/diffusers/models/lora.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index c1de9714cfc0..3c5fd96f42a2 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -158,10 +158,10 @@ class LoRACompatibleLinear(nn.Linear):
 
     def __init__(self, *args, lora_layer: Optional[LoRALinearLayer] = None, **kwargs):
         super().__init__(*args, **kwargs)
-        self.weight_original  = self.weight.data.clone()
         self.lora_layer = lora_layer
 
     def set_lora_layer(self, lora_layer: Optional[LoRALinearLayer]):
+        self.weight_original  = self.weight.data.clone()
         self.lora_layer = lora_layer
 
     def _fuse_lora(self):

From aedcb706b7b38b444da8dff9862b9edcbb585ece Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 13:49:45 +0530
Subject: [PATCH 43/56] debugging

---
 src/diffusers/models/lora.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index 3c5fd96f42a2..c73fbfe61809 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -159,6 +159,7 @@ class LoRACompatibleLinear(nn.Linear):
     def __init__(self, *args, lora_layer: Optional[LoRALinearLayer] = None, **kwargs):
         super().__init__(*args, **kwargs)
         self.lora_layer = lora_layer
+        self.weight_original = None
 
     def set_lora_layer(self, lora_layer: Optional[LoRALinearLayer]):
         self.weight_original  = self.weight.data.clone()

From 67b8aa6aac56f3b748b853420d8a43a0528c837e Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 13:52:27 +0530
Subject: [PATCH 44/56] debugging

---
 src/diffusers/models/lora.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index c73fbfe61809..2269a7b87a0c 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -159,10 +159,8 @@ class LoRACompatibleLinear(nn.Linear):
     def __init__(self, *args, lora_layer: Optional[LoRALinearLayer] = None, **kwargs):
         super().__init__(*args, **kwargs)
         self.lora_layer = lora_layer
-        self.weight_original = None
 
     def set_lora_layer(self, lora_layer: Optional[LoRALinearLayer]):
-        self.weight_original  = self.weight.data.clone()
         self.lora_layer = lora_layer
 
     def _fuse_lora(self):
@@ -208,8 +206,6 @@ def _unfuse_lora(self):
 
     def forward(self, hidden_states, lora_scale: int = 1):
         if self.lora_layer is None:
-            self.weight_original = self.weight_original.to(device=self.weight.data.device, dtype=self.weight.data.dtype)
-            print(f"Check if the original weights differ: {torch.allclose(self.weight_original, self.weight.data)}")
             return super().forward(hidden_states)
         else:
             return super().forward(hidden_states) + lora_scale * self.lora_layer(hidden_states)

From 1c69333c1c146a0680d2b136a764ae51db1262a5 Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 14:35:47 +0530
Subject: [PATCH 45/56] suit up the generator initialization a bit.

---
 tests/models/test_lora_layers.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/tests/models/test_lora_layers.py b/tests/models/test_lora_layers.py
index 14fcf4a2aace..765fdb5f9344 100644
--- a/tests/models/test_lora_layers.py
+++ b/tests/models/test_lora_layers.py
@@ -947,7 +947,7 @@ def test_sdxl_1_0_lora(self):
         self.assertTrue(np.allclose(images, expected, atol=1e-4))
 
     def test_sdxl_1_0_lora_fusion(self):
-        generator = torch.Generator().manual_seed(0)
+        generator = torch.manual_seed(0)
 
         pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
         lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
@@ -967,7 +967,7 @@ def test_sdxl_1_0_lora_fusion(self):
         self.assertTrue(np.allclose(images, expected, atol=1e-4))
 
     def test_sdxl_1_0_lora_unfusion(self):
-        generator = torch.Generator().manual_seed(0)
+        generator = torch.manual_seed(0)
 
         pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
         lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
@@ -982,6 +982,7 @@ def test_sdxl_1_0_lora_unfusion(self):
         images_with_fusion = images[0, -3:, -3:, -1].flatten()
 
         pipe.unet.unfuse_lora()
+        generator = torch.manual_seed(0)
         images = pipe(
             "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
         ).images
@@ -993,7 +994,7 @@ def test_sdxl_1_0_lora_unfusion_effectivity(self):
         pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
         pipe.enable_model_cpu_offload()
 
-        generator = torch.Generator().manual_seed(0)
+        generator = torch.manual_seed(0)
         images = pipe(
             "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
         ).images
@@ -1004,13 +1005,13 @@ def test_sdxl_1_0_lora_unfusion_effectivity(self):
         pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
         pipe.unet.fuse_lora()
 
-        generator = torch.Generator().manual_seed(0)
+        generator = torch.manual_seed(0)
         _ = pipe(
             "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
         ).images
 
         pipe.unet.unfuse_lora()
-        generator = torch.Generator().manual_seed(0)
+        generator = torch.manual_seed(0)
         images = pipe(
             "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
         ).images

From 55f59585067872f2afbbf1c567bc930234d99200 Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 14:56:29 +0530
Subject: [PATCH 46/56] remove print

---
 src/diffusers/loaders.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index 14ca0688f151..a2f0f113bed1 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -1721,7 +1721,6 @@ def unload_lora_weights(self):
         """
         for _, module in self.unet.named_modules():
             if hasattr(module, "set_lora_layer"):
-                print(f"Called from unload_lora_weights(): {module.lora_layer}")
                 module.set_lora_layer(None)
 
         # Safe to call the following regardless of LoRA.

From 73bdcb14cbc9131d4ec5203886566480cf47c238 Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 15:47:57 +0530
Subject: [PATCH 47/56] update assertion.

---
 tests/models/test_lora_layers.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/models/test_lora_layers.py b/tests/models/test_lora_layers.py
index 765fdb5f9344..e72da410c83d 100644
--- a/tests/models/test_lora_layers.py
+++ b/tests/models/test_lora_layers.py
@@ -942,7 +942,7 @@ def test_sdxl_1_0_lora(self):
         ).images
 
         images = images[0, -3:, -3:, -1].flatten()
-        expected = np.array([0.4468, 0.4087, 0.4134, 0.366, 0.3202, 0.3505, 0.3786, 0.387, 0.3535])
+        expected = np.array([0.4239, 0.3993, 0.341, 0.2577, 0.2681, 0.3015, 0.3214, 0.3868, 0.3386])
 
         self.assertTrue(np.allclose(images, expected, atol=1e-4))
 
@@ -962,7 +962,7 @@ def test_sdxl_1_0_lora_fusion(self):
 
         images = images[0, -3:, -3:, -1].flatten()
         # This way we also test equivalence between LoRA fusion and the non-fusion behaviour.
-        expected = np.array([0.4468, 0.4087, 0.4134, 0.366, 0.3202, 0.3505, 0.3786, 0.387, 0.3535])
+        expected = np.array([0.4239, 0.3993, 0.341, 0.2577, 0.2681, 0.3015, 0.3214, 0.3868, 0.3386])
 
         self.assertTrue(np.allclose(images, expected, atol=1e-4))
 

From 940ed1b6aa80313941fb1e173f6d02ad4d391fbf Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 17:21:15 +0530
Subject: [PATCH 48/56] debugging

---
 src/diffusers/loaders.py         |  3 +-
 tests/models/test_lora_layers.py | 69 ++++++++++++++++++++++++++++++++
 2 files changed, 71 insertions(+), 1 deletion(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index a2f0f113bed1..611f089ab34e 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -447,7 +447,7 @@ def format_to_lora_compatible(key):
             if network_alphas is not None:
                 network_alphas = {format_to_lora_compatible(k): v for k, v in network_alphas.items()}
         return state_dict, network_alphas
-
+ 
     def save_attn_procs(
         self,
         save_directory: Union[str, os.PathLike],
@@ -530,6 +530,7 @@ def fuse_lora(self):
 
     def _fuse_lora_apply(self, module):
         if hasattr(module, "_fuse_lora"):
+            print("Anything in here?")
             module._fuse_lora()
 
     def unfuse_lora(self):
diff --git a/tests/models/test_lora_layers.py b/tests/models/test_lora_layers.py
index e72da410c83d..650d203c4280 100644
--- a/tests/models/test_lora_layers.py
+++ b/tests/models/test_lora_layers.py
@@ -693,6 +693,75 @@ def test_load_lora_locally_safetensors(self):
 
         sd_pipe.unload_lora_weights()
 
+    def test_lora_fusion(self):
+        pipeline_components, lora_components = self.get_dummy_components()
+        sd_pipe = StableDiffusionXLPipeline(**pipeline_components)
+        sd_pipe = sd_pipe.to(torch_device)
+        sd_pipe.set_progress_bar_config(disable=None)
+
+        _, _, pipeline_inputs = self.get_dummy_inputs(with_generator=False)
+
+        original_images = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images
+        orig_image_slice = original_images[0, -3:, -3:, -1]
+
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            StableDiffusionXLPipeline.save_lora_weights(
+                save_directory=tmpdirname,
+                unet_lora_layers=lora_components["unet_lora_layers"],
+                text_encoder_lora_layers=lora_components["text_encoder_one_lora_layers"],
+                text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_layers"],
+                safe_serialization=True,
+            )
+            self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
+            sd_pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))
+
+        sd_pipe.unet.fuse_lora()
+        lora_images = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images
+        lora_image_slice = lora_images[0, -3:, -3:, -1]
+
+        self.assertFalse(np.allclose(orig_image_slice, lora_image_slice, atol=1e-3))
+
+    def test_unfuse_lora(self):
+        pipeline_components, lora_components = self.get_dummy_components()
+        sd_pipe = StableDiffusionXLPipeline(**pipeline_components)
+        sd_pipe = sd_pipe.to(torch_device)
+        sd_pipe.set_progress_bar_config(disable=None)
+
+        _, _, pipeline_inputs = self.get_dummy_inputs(with_generator=False)
+
+        original_images = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images
+        orig_image_slice = original_images[0, -3:, -3:, -1]
+
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            StableDiffusionXLPipeline.save_lora_weights(
+                save_directory=tmpdirname,
+                unet_lora_layers=lora_components["unet_lora_layers"],
+                text_encoder_lora_layers=lora_components["text_encoder_one_lora_layers"],
+                text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_layers"],
+                safe_serialization=True,
+            )
+            self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
+            sd_pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))
+
+        sd_pipe.unet.fuse_lora()
+        lora_images = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images
+        lora_image_slice = lora_images[0, -3:, -3:, -1]
+
+        # Unload LoRA parameters.
+        sd_pipe.unfuse_lora()
+        original_images = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images
+        orig_image_slice_two = original_images[0, -3:, -3:, -1]
+
+        assert not np.allclose(
+            orig_image_slice, lora_image_slice
+        ), "LoRA parameters should lead to a different image slice."
+        assert not np.allclose(
+            orig_image_slice_two, lora_image_slice
+        ), "LoRA parameters should lead to a different image slice."
+        assert np.allclose(
+            orig_image_slice, orig_image_slice_two, atol=1e-3
+        ), "Unloading LoRA parameters should lead to results similar to what was obtained with the pipeline without any LoRA parameters."
+
 
 @slow
 @require_torch_gpu

From 8fcd42ac4c572006e152bdbcdea41c58463faf66 Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 17:35:03 +0530
Subject: [PATCH 49/56] remove print.

---
 src/diffusers/loaders.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index 611f089ab34e..ca0f7b98c0fe 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -530,7 +530,6 @@ def fuse_lora(self):
 
     def _fuse_lora_apply(self, module):
         if hasattr(module, "_fuse_lora"):
-            print("Anything in here?")
             module._fuse_lora()
 
     def unfuse_lora(self):

From 6e995617fb7802939e4e8f191932af2062f32408 Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 17:48:57 +0530
Subject: [PATCH 50/56] fix: assertions.

---
 tests/models/test_lora_layers.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/tests/models/test_lora_layers.py b/tests/models/test_lora_layers.py
index 650d203c4280..4234ec3bdf8d 100644
--- a/tests/models/test_lora_layers.py
+++ b/tests/models/test_lora_layers.py
@@ -704,6 +704,11 @@ def test_lora_fusion(self):
         original_images = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images
         orig_image_slice = original_images[0, -3:, -3:, -1]
 
+        # Emulate training.
+        set_lora_weights(lora_components["unet_lora_layers"].parameters(), randn_weight=True)
+        set_lora_weights(lora_components["text_encoder_one_lora_layers"].parameters(), randn_weight=True)
+        set_lora_weights(lora_components["text_encoder_two_lora_layers"].parameters(), randn_weight=True)
+
         with tempfile.TemporaryDirectory() as tmpdirname:
             StableDiffusionXLPipeline.save_lora_weights(
                 save_directory=tmpdirname,
@@ -732,6 +737,11 @@ def test_unfuse_lora(self):
         original_images = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images
         orig_image_slice = original_images[0, -3:, -3:, -1]
 
+        # Emulate training.
+        set_lora_weights(lora_components["unet_lora_layers"].parameters(), randn_weight=True)
+        set_lora_weights(lora_components["text_encoder_one_lora_layers"].parameters(), randn_weight=True)
+        set_lora_weights(lora_components["text_encoder_two_lora_layers"].parameters(), randn_weight=True)
+
         with tempfile.TemporaryDirectory() as tmpdirname:
             StableDiffusionXLPipeline.save_lora_weights(
                 save_directory=tmpdirname,
@@ -748,7 +758,7 @@ def test_unfuse_lora(self):
         lora_image_slice = lora_images[0, -3:, -3:, -1]
 
         # Unload LoRA parameters.
-        sd_pipe.unfuse_lora()
+        sd_pipe.unet.unfuse_lora()
         original_images = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images
         orig_image_slice_two = original_images[0, -3:, -3:, -1]
 
@@ -1011,7 +1021,7 @@ def test_sdxl_1_0_lora(self):
         ).images
 
         images = images[0, -3:, -3:, -1].flatten()
-        expected = np.array([0.4239, 0.3993, 0.341, 0.2577, 0.2681, 0.3015, 0.3214, 0.3868, 0.3386])
+        expected = np.array([0.4468, 0.4087, 0.4134, 0.366, 0.3202, 0.3505, 0.3786, 0.387, 0.3535])
 
         self.assertTrue(np.allclose(images, expected, atol=1e-4))
 
@@ -1031,7 +1041,7 @@ def test_sdxl_1_0_lora_fusion(self):
 
         images = images[0, -3:, -3:, -1].flatten()
         # This way we also test equivalence between LoRA fusion and the non-fusion behaviour.
-        expected = np.array([0.4239, 0.3993, 0.341, 0.2577, 0.2681, 0.3015, 0.3214, 0.3868, 0.3386])
+        expected = np.array([0.4468, 0.4087, 0.4134, 0.366, 0.3202, 0.3505, 0.3786, 0.387, 0.3535])
 
         self.assertTrue(np.allclose(images, expected, atol=1e-4))
 

From c3adb8ce12f0f8ccd41e4f5cc38eb944eeef13f7 Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 17:59:49 +0530
Subject: [PATCH 51/56] style

---
 src/diffusers/loaders.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index ca0f7b98c0fe..a2f0f113bed1 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -447,7 +447,7 @@ def format_to_lora_compatible(key):
             if network_alphas is not None:
                 network_alphas = {format_to_lora_compatible(k): v for k, v in network_alphas.items()}
         return state_dict, network_alphas
- 
+
     def save_attn_procs(
         self,
         save_directory: Union[str, os.PathLike],

From 2d6cd0302c397fd3c50ababa41d8914bc4b78d0e Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 18:01:19 +0530
Subject: [PATCH 52/56] can generator be a problem?

---
 tests/models/test_lora_layers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/models/test_lora_layers.py b/tests/models/test_lora_layers.py
index 4234ec3bdf8d..dbe98d06775b 100644
--- a/tests/models/test_lora_layers.py
+++ b/tests/models/test_lora_layers.py
@@ -1026,7 +1026,7 @@ def test_sdxl_1_0_lora(self):
         self.assertTrue(np.allclose(images, expected, atol=1e-4))
 
     def test_sdxl_1_0_lora_fusion(self):
-        generator = torch.manual_seed(0)
+        generator = torch.Generator().manual_seed(0)
 
         pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
         lora_model_id = "hf-internal-testing/sdxl-1.0-lora"

From 53f2e744a7dc754ce49a20558a142ff28103aa03 Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 28 Aug 2023 18:10:06 +0530
Subject: [PATCH 53/56] generator

---
 tests/models/test_lora_layers.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/tests/models/test_lora_layers.py b/tests/models/test_lora_layers.py
index dbe98d06775b..82427ba9cfbc 100644
--- a/tests/models/test_lora_layers.py
+++ b/tests/models/test_lora_layers.py
@@ -1046,7 +1046,7 @@ def test_sdxl_1_0_lora_fusion(self):
         self.assertTrue(np.allclose(images, expected, atol=1e-4))
 
     def test_sdxl_1_0_lora_unfusion(self):
-        generator = torch.manual_seed(0)
+        generator = torch.Generator().manual_seed(0)
 
         pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
         lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
@@ -1061,19 +1061,19 @@ def test_sdxl_1_0_lora_unfusion(self):
         images_with_fusion = images[0, -3:, -3:, -1].flatten()
 
         pipe.unet.unfuse_lora()
-        generator = torch.manual_seed(0)
+        generator = torch.Generator().manual_seed(0)
         images = pipe(
             "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
         ).images
         images_without_fusion = images[0, -3:, -3:, -1].flatten()
 
-        self.assertFalse(np.allclose(images_with_fusion, images_without_fusion))
+        self.assertFalse(np.allclose(images_with_fusion, images_without_fusion, atol=1e-3))
 
     def test_sdxl_1_0_lora_unfusion_effectivity(self):
         pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
         pipe.enable_model_cpu_offload()
 
-        generator = torch.manual_seed(0)
+        generator = torch.Generator().manual_seed(0)
         images = pipe(
             "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
         ).images
@@ -1084,13 +1084,13 @@ def test_sdxl_1_0_lora_unfusion_effectivity(self):
         pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
         pipe.unet.fuse_lora()
 
-        generator = torch.manual_seed(0)
+        generator = torch.Generator().manual_seed(0)
         _ = pipe(
             "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
         ).images
 
         pipe.unet.unfuse_lora()
-        generator = torch.manual_seed(0)
+        generator = torch.Generator().manual_seed(0)
         images = pipe(
             "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
         ).images
@@ -1123,6 +1123,7 @@ def test_sdxl_1_0_lora_fusion_efficiency(self):
         pipe.enable_model_cpu_offload()
 
         start_time = time.time()
+        generator = torch.Generator().manual_seed(0)
         for _ in range(3):
             pipe(
                 "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2

From b9ea1fc8ac006c450dd5779f771c67f82f019f79 Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Tue, 29 Aug 2023 07:53:51 +0530
Subject: [PATCH 54/56] correct tests.

---
 src/diffusers/models/lora.py     |  1 -
 tests/models/test_lora_layers.py | 10 +++-------
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index 2269a7b87a0c..cb0b3b45eb69 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -171,7 +171,6 @@ def _fuse_lora(self):
         logger.info(f"Fusing LoRA weights for {self.__class__}")
 
         w_orig = self.weight.data.float()
-        self.weight_original = w_orig.clone()
         w_up = self.lora_layer.up.weight.data.float()
         w_down = self.lora_layer.down.weight.data.float()
 
diff --git a/tests/models/test_lora_layers.py b/tests/models/test_lora_layers.py
index 82427ba9cfbc..abf8d5eedf60 100644
--- a/tests/models/test_lora_layers.py
+++ b/tests/models/test_lora_layers.py
@@ -713,8 +713,6 @@ def test_lora_fusion(self):
             StableDiffusionXLPipeline.save_lora_weights(
                 save_directory=tmpdirname,
                 unet_lora_layers=lora_components["unet_lora_layers"],
-                text_encoder_lora_layers=lora_components["text_encoder_one_lora_layers"],
-                text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_layers"],
                 safe_serialization=True,
             )
             self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
@@ -746,8 +744,6 @@ def test_unfuse_lora(self):
             StableDiffusionXLPipeline.save_lora_weights(
                 save_directory=tmpdirname,
                 unet_lora_layers=lora_components["unet_lora_layers"],
-                text_encoder_lora_layers=lora_components["text_encoder_one_lora_layers"],
-                text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_layers"],
                 safe_serialization=True,
             )
             self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
@@ -764,13 +760,13 @@ def test_unfuse_lora(self):
 
         assert not np.allclose(
             orig_image_slice, lora_image_slice
-        ), "LoRA parameters should lead to a different image slice."
+        ), "Fusion of LoRAs should lead to a different image slice."
         assert not np.allclose(
             orig_image_slice_two, lora_image_slice
-        ), "LoRA parameters should lead to a different image slice."
+        ), "Fusion of LoRAs should lead to a different image slice."
         assert np.allclose(
             orig_image_slice, orig_image_slice_two, atol=1e-3
-        ), "Unloading LoRA parameters should lead to results similar to what was obtained with the pipeline without any LoRA parameters."
+        ), "Reversing the fusion of LoRAs should lead to results similar to what was obtained with the pipeline without any LoRA parameters."
 
 
 @slow

From 9cb8ec310e981454c6dd8bbcc238ebda31aca8af Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Tue, 29 Aug 2023 08:21:28 +0530
Subject: [PATCH 55/56] support text encoder lora fusion.

---
 src/diffusers/loaders.py | 119 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 119 insertions(+)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index a2f0f113bed1..966fdc616457 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -85,7 +85,49 @@ def __init__(self, regular_linear_layer, lora_scale=1, network_alpha=None, rank=
 
         self.lora_scale = lora_scale
 
+    def _fuse_lora(self):
+        if self.lora_linear_layer is None:
+            return
+
+        dtype, device = self.regular_linear_layer.weight.data.dtype, self.regular_linear_layer.weight.data.device
+        logger.info(f"Fusing LoRA weights for {self.__class__}")
+
+        w_orig = self.regular_linear_layer.weight.data.float()
+        w_up = self.lora_linear_layer.up.weight.data.float()
+        w_down = self.lora_linear_layer.down.weight.data.float()
+
+        if self.lora_linear_layer.network_alpha is not None:
+            w_up = w_up * self.lora_linear_layer.network_alpha / self.lora_linear_layer.rank
+
+        fused_weight = w_orig + torch.bmm(w_up[None, :], w_down[None, :])[0]
+        self.regular_linear_layer.weight.data = fused_weight.to(device=device, dtype=dtype)
+
+        # we can drop the lora layer now
+        self.lora_linear_layer = None
+
+        # offload the up and down matrices to CPU to not blow the memory
+        self.w_up = w_up.cpu()
+        self.w_down = w_down.cpu()
+
+    def _unfuse_lora(self):
+        if not (hasattr(self, "w_up") and hasattr(self, "w_down")):
+            return
+        logger.info(f"Unfusing LoRA weights for {self.__class__}")
+
+        fused_weight = self.regular_linear_layer.weight.data
+        dtype, device = fused_weight.dtype, fused_weight.device
+
+        self.w_up = self.w_up.to(device=device, dtype=dtype)
+        self.w_down = self.w_down.to(device, dtype=dtype)
+        unfused_weight = fused_weight - torch.bmm(self.w_up[None, :], self.w_down[None, :])[0]
+        self.regular_linear_layer.weight.data = unfused_weight.to(device=device, dtype=dtype)
+
+        self.w_up = None
+        self.w_down = None
+
     def forward(self, input):
+        if self.lora_linear_layer is None:
+            return self.regular_linear_layer(input)
         return self.regular_linear_layer(input) + self.lora_scale * self.lora_linear_layer(input)
 
 
@@ -1726,6 +1768,83 @@ def unload_lora_weights(self):
         # Safe to call the following regardless of LoRA.
         self._remove_text_encoder_monkey_patch()
 
+    def fuse_lora(self, fuse_unet: bool = True, fuse_text_encoder: bool = True):
+        r"""
+        Fuses the LoRA parameters into the original parameters of the corresponding blocks.
+
+        <Tip warning={true}>
+
+        This is an experimental API.
+
+        </Tip>
+
+        Args:
+            fuse_unet (`bool`, defaults to `True`): Whether to fuse the UNet LoRA parameters.
+            fuse_text_encoder (`bool`, defaults to `True`):
+                Whether to fuse the text encoder LoRA parameters. If the text encoder wasn't monkey-patched with the
+                LoRA parameters then it won't have any effect.
+        """
+        if fuse_unet:
+            self.unet.fuse_lora()
+
+        def fuse_text_encoder_lora(text_encoder):
+            for _, attn_module in text_encoder_attn_modules(text_encoder):
+                if isinstance(attn_module.q_proj, PatchedLoraProjection):
+                    attn_module.q_proj._fuse_lora()
+                    attn_module.k_proj._fuse_lora()
+                    attn_module.v_proj._fuse_lora()
+                    attn_module.out_proj._fuse_lora()
+
+            for _, mlp_module in text_encoder_mlp_modules(text_encoder):
+                if isinstance(mlp_module.fc1, PatchedLoraProjection):
+                    mlp_module.fc1._fuse_lora()
+                    mlp_module.fc2._fuse_lora()
+
+        if fuse_text_encoder:
+            if hasattr(self, "text_encoder"):
+                fuse_text_encoder_lora(self.text_encoder)
+            if hasattr(self, "text_encoder_2"):
+                fuse_text_encoder_lora(self.text_encoder_2)
+
+    def unfuse_lora(self, unfuse_unet: bool = True, unfuse_text_encoder: bool = True):
+        r"""
+        Reverses the effect of
+        [`pipe.fuse_lora()`](https://huggingface.co/docs/diffusers/main/en/api/loaders#diffusers.loaders.LoraLoaderMixin.fuse_lora).
+
+        <Tip warning={true}>
+
+        This is an experimental API.
+
+        </Tip>
+
+        Args:
+            unfuse_unet (`bool`, defaults to `True`): Whether to unfuse the UNet LoRA parameters.
+            unfuse_text_encoder (`bool`, defaults to `True`):
+                Whether to unfuse the text encoder LoRA parameters. If the text encoder wasn't monkey-patched with the
+                LoRA parameters then it won't have any effect.
+        """
+        if unfuse_unet:
+            self.unet.unfuse_lora()
+
+        def unfuse_text_encoder_lora(text_encoder):
+            for _, attn_module in text_encoder_attn_modules(text_encoder):
+                if isinstance(attn_module.q_proj, PatchedLoraProjection):
+                    attn_module.q_proj._unfuse_lora()
+                    attn_module.k_proj._unfuse_lora()
+                    attn_module.v_proj._unfuse_lora()
+                    attn_module.out_proj._unfuse_lora()
+
+            for _, mlp_module in text_encoder_mlp_modules(text_encoder):
+                if isinstance(mlp_module.fc1, PatchedLoraProjection):
+                    mlp_module.fc1._unfuse_lora()
+                    mlp_module.fc2._unfuse_lora()
+
+        if unfuse_text_encoder:
+            if hasattr(self, "text_encoder"):
+                unfuse_text_encoder_lora(self.text_encoder)
+            if hasattr(self, "text_encoder_2"):
+                unfuse_text_encoder_lora(self.text_encoder_2)
+
 
 class FromSingleFileMixin:
     """

From 50c611d1b4c0bb180306a85babe9c933ab5fdf99 Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Tue, 29 Aug 2023 09:06:49 +0530
Subject: [PATCH 56/56] tighten up tests.

---
 tests/models/test_lora_layers.py | 65 ++++++++++++++++++++++++++------
 1 file changed, 54 insertions(+), 11 deletions(-)

diff --git a/tests/models/test_lora_layers.py b/tests/models/test_lora_layers.py
index abf8d5eedf60..f17529a1680e 100644
--- a/tests/models/test_lora_layers.py
+++ b/tests/models/test_lora_layers.py
@@ -713,12 +713,14 @@ def test_lora_fusion(self):
             StableDiffusionXLPipeline.save_lora_weights(
                 save_directory=tmpdirname,
                 unet_lora_layers=lora_components["unet_lora_layers"],
+                text_encoder_lora_layers=lora_components["text_encoder_one_lora_layers"],
+                text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_layers"],
                 safe_serialization=True,
             )
             self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
             sd_pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))
 
-        sd_pipe.unet.fuse_lora()
+        sd_pipe.fuse_lora()
         lora_images = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images
         lora_image_slice = lora_images[0, -3:, -3:, -1]
 
@@ -744,17 +746,19 @@ def test_unfuse_lora(self):
             StableDiffusionXLPipeline.save_lora_weights(
                 save_directory=tmpdirname,
                 unet_lora_layers=lora_components["unet_lora_layers"],
+                text_encoder_lora_layers=lora_components["text_encoder_one_lora_layers"],
+                text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_layers"],
                 safe_serialization=True,
             )
             self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
             sd_pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))
 
-        sd_pipe.unet.fuse_lora()
+        sd_pipe.fuse_lora()
         lora_images = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images
         lora_image_slice = lora_images[0, -3:, -3:, -1]
 
-        # Unload LoRA parameters.
-        sd_pipe.unet.unfuse_lora()
+        # Reverse LoRA fusion.
+        sd_pipe.unfuse_lora()
         original_images = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images
         orig_image_slice_two = original_images[0, -3:, -3:, -1]
 
@@ -766,7 +770,46 @@ def test_unfuse_lora(self):
         ), "Fusion of LoRAs should lead to a different image slice."
         assert np.allclose(
             orig_image_slice, orig_image_slice_two, atol=1e-3
-        ), "Reversing the fusion of LoRAs should lead to results similar to what was obtained with the pipeline without any LoRA parameters."
+        ), "Reversing LoRA fusion should lead to results similar to what was obtained with the pipeline without any LoRA parameters."
+
+    def test_lora_fusion_is_not_affected_by_unloading(self):
+        pipeline_components, lora_components = self.get_dummy_components()
+        sd_pipe = StableDiffusionXLPipeline(**pipeline_components)
+        sd_pipe = sd_pipe.to(torch_device)
+        sd_pipe.set_progress_bar_config(disable=None)
+
+        _, _, pipeline_inputs = self.get_dummy_inputs(with_generator=False)
+
+        _ = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images
+
+        # Emulate training.
+        set_lora_weights(lora_components["unet_lora_layers"].parameters(), randn_weight=True)
+        set_lora_weights(lora_components["text_encoder_one_lora_layers"].parameters(), randn_weight=True)
+        set_lora_weights(lora_components["text_encoder_two_lora_layers"].parameters(), randn_weight=True)
+
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            StableDiffusionXLPipeline.save_lora_weights(
+                save_directory=tmpdirname,
+                unet_lora_layers=lora_components["unet_lora_layers"],
+                text_encoder_lora_layers=lora_components["text_encoder_one_lora_layers"],
+                text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_layers"],
+                safe_serialization=True,
+            )
+            self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
+            sd_pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))
+
+        sd_pipe.fuse_lora()
+        lora_images = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images
+        lora_image_slice = lora_images[0, -3:, -3:, -1]
+
+        # Unload LoRA parameters.
+        sd_pipe.unload_lora_weights()
+        images_with_unloaded_lora = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images
+        images_with_unloaded_lora_slice = images_with_unloaded_lora[0, -3:, -3:, -1]
+
+        assert np.allclose(
+            lora_image_slice, images_with_unloaded_lora_slice
+        ), "`unload_lora_weights()` should have not effect on the semantics of the results as the LoRA parameters were fused."
 
 
 @slow
@@ -1028,7 +1071,7 @@ def test_sdxl_1_0_lora_fusion(self):
         lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
         lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
         pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
-        pipe.unet.fuse_lora()
+        pipe.fuse_lora()
         pipe.enable_model_cpu_offload()
 
         images = pipe(
@@ -1048,7 +1091,7 @@ def test_sdxl_1_0_lora_unfusion(self):
         lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
         lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
         pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
-        pipe.unet.fuse_lora()
+        pipe.fuse_lora()
         pipe.enable_model_cpu_offload()
 
         images = pipe(
@@ -1056,7 +1099,7 @@ def test_sdxl_1_0_lora_unfusion(self):
         ).images
         images_with_fusion = images[0, -3:, -3:, -1].flatten()
 
-        pipe.unet.unfuse_lora()
+        pipe.unfuse_lora()
         generator = torch.Generator().manual_seed(0)
         images = pipe(
             "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
@@ -1078,14 +1121,14 @@ def test_sdxl_1_0_lora_unfusion_effectivity(self):
         lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
         lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
         pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
-        pipe.unet.fuse_lora()
+        pipe.fuse_lora()
 
         generator = torch.Generator().manual_seed(0)
         _ = pipe(
             "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
         ).images
 
-        pipe.unet.unfuse_lora()
+        pipe.unfuse_lora()
         generator = torch.Generator().manual_seed(0)
         images = pipe(
             "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
@@ -1115,7 +1158,7 @@ def test_sdxl_1_0_lora_fusion_efficiency(self):
 
         pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
         pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
-        pipe.unet.fuse_lora()
+        pipe.fuse_lora()
         pipe.enable_model_cpu_offload()
 
         start_time = time.time()