From 507d043766186009fedeafa1f61f6ac97f91cfb8 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 08:52:49 +0530
Subject: [PATCH 01/44] start deprecating loraattn.

---
 tests/lora/test_lora_layers_old_backend.py | 857 +++++++--------------
 1 file changed, 299 insertions(+), 558 deletions(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index 3d3b858fa0fd..2b99655c74c0 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -22,7 +22,6 @@
 import numpy as np
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
 from huggingface_hub.repocard import RepoCard
 from PIL import Image
 from transformers import CLIPTextConfig, CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer
@@ -41,17 +40,14 @@
     UNet2DConditionModel,
     UNet3DConditionModel,
 )
-from diffusers.loaders import AttnProcsLayers, LoraLoaderMixin
+from diffusers.loaders import LoraLoaderMixin, StableDiffusionXLLoraLoaderMixin
 from diffusers.models.attention_processor import (
     Attention,
     AttnProcessor,
     AttnProcessor2_0,
-    LoRAAttnProcessor,
-    LoRAAttnProcessor2_0,
-    LoRAXFormersAttnProcessor,
     XFormersAttnProcessor,
 )
-from diffusers.models.lora import PatchedLoraProjection, text_encoder_attn_modules
+from diffusers.models.lora import LoRALinearLayer
 from diffusers.utils.import_utils import is_xformers_available
 from diffusers.utils.testing_utils import (
     deprecate_after_peft_backend,
@@ -64,109 +60,147 @@
 )
 
 
-def create_lora_layers(model, mock_weights: bool = True):
-    lora_attn_procs = {}
-    for name in model.attn_processors.keys():
-        cross_attention_dim = None if name.endswith("attn1.processor") else model.config.cross_attention_dim
-        if name.startswith("mid_block"):
-            hidden_size = model.config.block_out_channels[-1]
-        elif name.startswith("up_blocks"):
-            block_id = int(name[len("up_blocks.")])
-            hidden_size = list(reversed(model.config.block_out_channels))[block_id]
-        elif name.startswith("down_blocks"):
-            block_id = int(name[len("down_blocks.")])
-            hidden_size = model.config.block_out_channels[block_id]
+# def create_lora_layers(model, mock_weights: bool = True):
+#     lora_attn_procs = {}
+#     for name in model.attn_processors.keys():
+#         cross_attention_dim = None if name.endswith("attn1.processor") else model.config.cross_attention_dim
+#         if name.startswith("mid_block"):
+#             hidden_size = model.config.block_out_channels[-1]
+#         elif name.startswith("up_blocks"):
+#             block_id = int(name[len("up_blocks.")])
+#             hidden_size = list(reversed(model.config.block_out_channels))[block_id]
+#         elif name.startswith("down_blocks"):
+#             block_id = int(name[len("down_blocks.")])
+#             hidden_size = model.config.block_out_channels[block_id]
 
-        lora_attn_procs[name] = LoRAAttnProcessor(hidden_size=hidden_size, cross_attention_dim=cross_attention_dim)
-        lora_attn_procs[name] = lora_attn_procs[name].to(model.device)
-
-        if mock_weights:
-            # add 1 to weights to mock trained weights
-            with torch.no_grad():
-                lora_attn_procs[name].to_q_lora.up.weight += 1
-                lora_attn_procs[name].to_k_lora.up.weight += 1
-                lora_attn_procs[name].to_v_lora.up.weight += 1
-                lora_attn_procs[name].to_out_lora.up.weight += 1
-
-    return lora_attn_procs
-
-
-def create_unet_lora_layers(unet: nn.Module):
-    lora_attn_procs = {}
-    for name in unet.attn_processors.keys():
-        cross_attention_dim = None if name.endswith("attn1.processor") else unet.config.cross_attention_dim
-        if name.startswith("mid_block"):
-            hidden_size = unet.config.block_out_channels[-1]
-        elif name.startswith("up_blocks"):
-            block_id = int(name[len("up_blocks.")])
-            hidden_size = list(reversed(unet.config.block_out_channels))[block_id]
-        elif name.startswith("down_blocks"):
-            block_id = int(name[len("down_blocks.")])
-            hidden_size = unet.config.block_out_channels[block_id]
-        lora_attn_processor_class = (
-            LoRAAttnProcessor2_0 if hasattr(F, "scaled_dot_product_attention") else LoRAAttnProcessor
-        )
-        lora_attn_procs[name] = lora_attn_processor_class(
-            hidden_size=hidden_size, cross_attention_dim=cross_attention_dim
-        )
-    unet_lora_layers = AttnProcsLayers(lora_attn_procs)
-    return lora_attn_procs, unet_lora_layers
+#         lora_attn_procs[name] = LoRAAttnProcessor(hidden_size=hidden_size, cross_attention_dim=cross_attention_dim)
+#         lora_attn_procs[name] = lora_attn_procs[name].to(model.device)
 
+#         if mock_weights:
+#             # add 1 to weights to mock trained weights
+#             with torch.no_grad():
+#                 lora_attn_procs[name].to_q_lora.up.weight += 1
+#                 lora_attn_procs[name].to_k_lora.up.weight += 1
+#                 lora_attn_procs[name].to_v_lora.up.weight += 1
+#                 lora_attn_procs[name].to_out_lora.up.weight += 1
 
-def create_text_encoder_lora_attn_procs(text_encoder: nn.Module):
-    text_lora_attn_procs = {}
-    lora_attn_processor_class = (
-        LoRAAttnProcessor2_0 if hasattr(F, "scaled_dot_product_attention") else LoRAAttnProcessor
-    )
-    for name, module in text_encoder_attn_modules(text_encoder):
-        if isinstance(module.out_proj, nn.Linear):
-            out_features = module.out_proj.out_features
-        elif isinstance(module.out_proj, PatchedLoraProjection):
-            out_features = module.out_proj.regular_linear_layer.out_features
-        else:
-            assert False, module.out_proj.__class__
-
-        text_lora_attn_procs[name] = lora_attn_processor_class(hidden_size=out_features, cross_attention_dim=None)
-    return text_lora_attn_procs
+#     return lora_attn_procs
 
 
-def create_text_encoder_lora_layers(text_encoder: nn.Module):
-    text_lora_attn_procs = create_text_encoder_lora_attn_procs(text_encoder)
-    text_encoder_lora_layers = AttnProcsLayers(text_lora_attn_procs)
-    return text_encoder_lora_layers
+def create_unet_lora_layers(unet: nn.Module, rank=4, is_3d=False, mock_weights=True):
+    unet_lora_parameters = []
+    in_features = None
 
-
-def create_lora_3d_layers(model, mock_weights: bool = True):
-    lora_attn_procs = {}
-    for name in model.attn_processors.keys():
-        has_cross_attention = name.endswith("attn2.processor") and not (
-            name.startswith("transformer_in") or "temp_attentions" in name.split(".")
-        )
-        cross_attention_dim = model.config.cross_attention_dim if has_cross_attention else None
-        if name.startswith("mid_block"):
-            hidden_size = model.config.block_out_channels[-1]
-        elif name.startswith("up_blocks"):
-            block_id = int(name[len("up_blocks.")])
-            hidden_size = list(reversed(model.config.block_out_channels))[block_id]
-        elif name.startswith("down_blocks"):
-            block_id = int(name[len("down_blocks.")])
-            hidden_size = model.config.block_out_channels[block_id]
-        elif name.startswith("transformer_in"):
+    for attn_processor_name, attn_processor in unet.attn_processors.items():
+        if is_3d and attn_processor_name.startswith("transformer_in"):
             # Note that the `8 * ...` comes from: https://github.com/huggingface/diffusers/blob/7139f0e874f10b2463caa8cbd585762a309d12d6/src/diffusers/models/unet_3d_condition.py#L148
-            hidden_size = 8 * model.config.attention_head_dim
-
-        lora_attn_procs[name] = LoRAAttnProcessor(hidden_size=hidden_size, cross_attention_dim=cross_attention_dim)
-        lora_attn_procs[name] = lora_attn_procs[name].to(model.device)
+            in_features = 8 * unet.config.attention_head_dim
+
+        # Parse the attention module.
+        attn_module = unet
+        for n in attn_processor_name.split(".")[:-1]:
+            attn_module = getattr(attn_module, n)
+
+        # Set the `lora_layer` attribute of the attention-related matrices.
+        attn_module.to_q.set_lora_layer(
+            LoRALinearLayer(
+                in_features=attn_module.to_q.in_features if in_features is None else in_features,
+                out_features=attn_module.to_q.out_features,
+                rank=rank,
+            )
+        )
+        attn_module.to_k.set_lora_layer(
+            LoRALinearLayer(
+                in_features=attn_module.to_k.in_features if in_features is None else in_features,
+                out_features=attn_module.to_k.out_features,
+                rank=rank,
+            )
+        )
+        attn_module.to_v.set_lora_layer(
+            LoRALinearLayer(
+                in_features=attn_module.to_v.in_features if in_features is None else in_features,
+                out_features=attn_module.to_v.out_features,
+                rank=rank,
+            )
+        )
+        attn_module.to_out[0].set_lora_layer(
+            LoRALinearLayer(
+                in_features=attn_module.to_out[0].in_features if in_features is None else in_features,
+                out_features=attn_module.to_out[0].out_features,
+                rank=rank,
+            )
+        )
 
         if mock_weights:
-            # add 1 to weights to mock trained weights
             with torch.no_grad():
-                lora_attn_procs[name].to_q_lora.up.weight += 1
-                lora_attn_procs[name].to_k_lora.up.weight += 1
-                lora_attn_procs[name].to_v_lora.up.weight += 1
-                lora_attn_procs[name].to_out_lora.up.weight += 1
-
-    return lora_attn_procs
+                attn_module.to_q.lora_layer.up.weight += 1
+                attn_module.to_k.lora_layer.up.weight += 1
+                attn_module.to_v.lora_layer.up.weight += 1
+                attn_module.to_out[0].lora_layer.up.weight += 1
+
+        # Accumulate the LoRA params to optimize.
+        unet_lora_parameters.extend(attn_module.to_q.lora_layer.parameters())
+        unet_lora_parameters.extend(attn_module.to_k.lora_layer.parameters())
+        unet_lora_parameters.extend(attn_module.to_v.lora_layer.parameters())
+        unet_lora_parameters.extend(attn_module.to_out[0].lora_layer.parameters())
+
+    return unet_lora_parameters
+
+
+# def create_text_encoder_lora_attn_procs(text_encoder: nn.Module):
+#     text_lora_attn_procs = {}
+#     lora_attn_processor_class = (
+#         LoRAAttnProcessor2_0 if hasattr(F, "scaled_dot_product_attention") else LoRAAttnProcessor
+#     )
+#     for name, module in text_encoder_attn_modules(text_encoder):
+#         if isinstance(module.out_proj, nn.Linear):
+#             out_features = module.out_proj.out_features
+#         elif isinstance(module.out_proj, PatchedLoraProjection):
+#             out_features = module.out_proj.regular_linear_layer.out_features
+#         else:
+#             assert False, module.out_proj.__class__
+
+#         text_lora_attn_procs[name] = lora_attn_processor_class(hidden_size=out_features, cross_attention_dim=None)
+#     return text_lora_attn_procs
+
+
+# def create_text_encoder_lora_layers(text_encoder: nn.Module):
+#     text_lora_attn_procs = create_text_encoder_lora_attn_procs(text_encoder)
+#     text_encoder_lora_layers = AttnProcsLayers(text_lora_attn_procs)
+#     return text_encoder_lora_layers
+
+
+# def create_lora_3d_layers(model, mock_weights: bool = True):
+#     lora_attn_procs = {}
+#     for name in model.attn_processors.keys():
+#         has_cross_attention = name.endswith("attn2.processor") and not (
+#             name.startswith("transformer_in") or "temp_attentions" in name.split(".")
+#         )
+#         cross_attention_dim = model.config.cross_attention_dim if has_cross_attention else None
+#         if name.startswith("mid_block"):
+#             hidden_size = model.config.block_out_channels[-1]
+#         elif name.startswith("up_blocks"):
+#             block_id = int(name[len("up_blocks.")])
+#             hidden_size = list(reversed(model.config.block_out_channels))[block_id]
+#         elif name.startswith("down_blocks"):
+#             block_id = int(name[len("down_blocks.")])
+#             hidden_size = model.config.block_out_channels[block_id]
+#         elif name.startswith("transformer_in"):
+#             # Note that the `8 * ...` comes from: https://github.com/huggingface/diffusers/blob/7139f0e874f10b2463caa8cbd585762a309d12d6/src/diffusers/models/unet_3d_condition.py#L148
+#             hidden_size = 8 * model.config.attention_head_dim
+
+#         lora_attn_procs[name] = LoRAAttnProcessor(hidden_size=hidden_size, cross_attention_dim=cross_attention_dim)
+#         lora_attn_procs[name] = lora_attn_procs[name].to(model.device)
+
+#         if mock_weights:
+#             # add 1 to weights to mock trained weights
+#             with torch.no_grad():
+#                 lora_attn_procs[name].to_q_lora.up.weight += 1
+#                 lora_attn_procs[name].to_k_lora.up.weight += 1
+#                 lora_attn_procs[name].to_v_lora.up.weight += 1
+#                 lora_attn_procs[name].to_out_lora.up.weight += 1
+
+#     return lora_attn_procs
 
 
 def set_lora_weights(lora_attn_parameters, randn_weight=False, var=1.0):
@@ -192,6 +226,8 @@ def state_dicts_almost_equal(sd1, sd2):
 
 @deprecate_after_peft_backend
 class LoraLoaderMixinTests(unittest.TestCase):
+    lora_rank = 4
+
     def get_dummy_components(self):
         torch.manual_seed(0)
         unet = UNet2DConditionModel(
@@ -235,8 +271,10 @@ def get_dummy_components(self):
         text_encoder = CLIPTextModel(text_encoder_config)
         tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
 
-        unet_lora_attn_procs, unet_lora_layers = create_unet_lora_layers(unet)
-        text_encoder_lora_layers = create_text_encoder_lora_layers(text_encoder)
+        unet_lora_params = create_unet_lora_layers(unet, rank=self.lora_rank)
+        text_encoder_lora_params = LoraLoaderMixin._modify_text_encoder(
+            text_encoder, dtype=torch.float32, rank=self.lora_rank
+        )
 
         pipeline_components = {
             "unet": unet,
@@ -248,11 +286,7 @@ def get_dummy_components(self):
             "feature_extractor": None,
             "image_encoder": None,
         }
-        lora_components = {
-            "unet_lora_layers": unet_lora_layers,
-            "text_encoder_lora_layers": text_encoder_lora_layers,
-            "unet_lora_attn_procs": unet_lora_attn_procs,
-        }
+        lora_components = {"unet_lora_params": unet_lora_params, "text_encoder_lora_params": text_encoder_lora_params}
         return pipeline_components, lora_components
 
     def get_dummy_inputs(self, with_generator=True):
@@ -290,8 +324,8 @@ def create_lora_weight_file(self, tmpdirname):
         _, lora_components = self.get_dummy_components()
         LoraLoaderMixin.save_lora_weights(
             save_directory=tmpdirname,
-            unet_lora_layers=lora_components["unet_lora_layers"],
-            text_encoder_lora_layers=lora_components["text_encoder_lora_layers"],
+            unet_lora_layers=lora_components["unet_lora_params"],
+            text_encoder_lora_layers=lora_components["text_encoder_lora_params"],
         )
         self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
 
@@ -311,68 +345,8 @@ def test_stable_diffusion_xformers_attn_processors(self):
         image = sd_pipe(**inputs).images
         assert image.shape == (1, 64, 64, 3)
 
-    @unittest.skipIf(not torch.cuda.is_available(), reason="xformers requires cuda")
-    def test_stable_diffusion_attn_processors(self):
-        # disable_full_determinism()
-        device = "cuda"  # ensure determinism for the device-dependent torch.Generator
-        components, _ = self.get_dummy_components()
-        sd_pipe = StableDiffusionPipeline(**components)
-        sd_pipe = sd_pipe.to(device)
-        sd_pipe.set_progress_bar_config(disable=None)
-
-        _, _, inputs = self.get_dummy_inputs()
-
-        # run normal sd pipe
-        image = sd_pipe(**inputs).images
-        assert image.shape == (1, 64, 64, 3)
-
-        # run attention slicing
-        sd_pipe.enable_attention_slicing()
-        image = sd_pipe(**inputs).images
-        assert image.shape == (1, 64, 64, 3)
-
-        # run vae attention slicing
-        sd_pipe.enable_vae_slicing()
-        image = sd_pipe(**inputs).images
-        assert image.shape == (1, 64, 64, 3)
-
-        # run lora attention
-        attn_processors, _ = create_unet_lora_layers(sd_pipe.unet)
-        attn_processors = {k: v.to("cuda") for k, v in attn_processors.items()}
-        sd_pipe.unet.set_attn_processor(attn_processors)
-        image = sd_pipe(**inputs).images
-        assert image.shape == (1, 64, 64, 3)
-
-    @unittest.skipIf(not torch.cuda.is_available() or not is_xformers_available(), reason="xformers requires cuda")
-    def test_stable_diffusion_set_xformers_attn_processors(self):
-        # disable_full_determinism()
-        device = "cuda"  # ensure determinism for the device-dependent torch.Generator
-        components, _ = self.get_dummy_components()
-        sd_pipe = StableDiffusionPipeline(**components)
-        sd_pipe = sd_pipe.to(device)
-        sd_pipe.set_progress_bar_config(disable=None)
-
-        _, _, inputs = self.get_dummy_inputs()
-
-        # run normal sd pipe
-        image = sd_pipe(**inputs).images
-        assert image.shape == (1, 64, 64, 3)
-
-        # run lora xformers attention
-        attn_processors, _ = create_unet_lora_layers(sd_pipe.unet)
-        attn_processors = {
-            k: LoRAXFormersAttnProcessor(hidden_size=v.hidden_size, cross_attention_dim=v.cross_attention_dim)
-            for k, v in attn_processors.items()
-        }
-        attn_processors = {k: v.to("cuda") for k, v in attn_processors.items()}
-        sd_pipe.unet.set_attn_processor(attn_processors)
-        image = sd_pipe(**inputs).images
-        assert image.shape == (1, 64, 64, 3)
-
-        # enable_full_determinism()
-
     def test_stable_diffusion_lora(self):
-        components, _ = self.get_dummy_components()
+        components, lora_components = self.get_dummy_components()
         sd_pipe = StableDiffusionPipeline(**components)
         sd_pipe = sd_pipe.to(torch_device)
         sd_pipe.set_progress_bar_config(disable=None)
@@ -385,9 +359,14 @@ def test_stable_diffusion_lora(self):
         image_slice = image[0, -3:, -3:, -1]
 
         # set lora layers
-        lora_attn_procs = create_lora_layers(sd_pipe.unet)
-        sd_pipe.unet.set_attn_processor(lora_attn_procs)
-        sd_pipe = sd_pipe.to(torch_device)
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            LoraLoaderMixin.save_lora_weights(
+                save_directory=tmpdirname,
+                unet_lora_layers=lora_components["unet_lora_params"],
+                text_encoder_lora_layers=lora_components["text_encoder_lora_params"],
+            )
+            self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
+            sd_pipe.load_lora_weights(tmpdirname)
 
         # forward 2
         _, _, inputs = self.get_dummy_inputs()
@@ -420,8 +399,8 @@ def test_lora_save_load(self):
         with tempfile.TemporaryDirectory() as tmpdirname:
             LoraLoaderMixin.save_lora_weights(
                 save_directory=tmpdirname,
-                unet_lora_layers=lora_components["unet_lora_layers"],
-                text_encoder_lora_layers=lora_components["text_encoder_lora_layers"],
+                unet_lora_layers=lora_components["unet_lora_params"],
+                text_encoder_lora_layers=lora_components["text_encoder_lora_params"],
             )
             self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
             sd_pipe.load_lora_weights(tmpdirname)
@@ -434,7 +413,6 @@ def test_lora_save_load(self):
 
     def test_lora_save_load_no_safe_serialization(self):
         pipeline_components, lora_components = self.get_dummy_components()
-        unet_lora_attn_procs = lora_components["unet_lora_attn_procs"]
         sd_pipe = StableDiffusionPipeline(**pipeline_components)
         sd_pipe = sd_pipe.to(torch_device)
         sd_pipe.set_progress_bar_config(disable=None)
@@ -445,9 +423,13 @@ def test_lora_save_load_no_safe_serialization(self):
         orig_image_slice = original_images[0, -3:, -3:, -1]
 
         with tempfile.TemporaryDirectory() as tmpdirname:
-            unet = sd_pipe.unet
-            unet.set_attn_processor(unet_lora_attn_procs)
-            unet.save_attn_procs(tmpdirname, safe_serialization=False)
+            LoraLoaderMixin.save_lora_weights(
+                save_directory=tmpdirname,
+                unet_lora_layers=lora_components["unet_lora_params"],
+                text_encoder_lora_layers=lora_components["text_encoder_lora_params"],
+                safe_serialization=False,
+            )
+
             self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.bin")))
             sd_pipe.load_lora_weights(tmpdirname)
 
@@ -469,8 +451,15 @@ def test_text_encoder_lora_monkey_patch(self):
 
         # monkey patch
         params = pipe._modify_text_encoder(pipe.text_encoder, pipe.lora_scale)
-
         set_lora_weights(params, randn_weight=False)
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            LoraLoaderMixin.save_lora_weights(
+                save_directory=tmpdirname,
+                unet_lora_layers=None,
+                text_encoder_lora_layers=params,
+            )
+            self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
+            pipe.load_lora_weights(tmpdirname)
 
         # inference with lora
         outputs_with_lora = pipe.text_encoder(**dummy_tokens)[0]
@@ -480,13 +469,20 @@ def test_text_encoder_lora_monkey_patch(self):
             outputs_without_lora, outputs_with_lora
         ), "lora_up_weight are all zero, so the lora outputs should be the same to without lora outputs"
 
-        # create lora_attn_procs with randn up.weights
-        create_text_encoder_lora_attn_procs(pipe.text_encoder)
-
         # monkey patch
-        params = pipe._modify_text_encoder(pipe.text_encoder, pipe.lora_scale)
+        pipeline_components, _ = self.get_dummy_components()
+        pipe = StableDiffusionPipeline(**pipeline_components)
 
+        params = pipe._modify_text_encoder(pipe.text_encoder, pipe.lora_scale)
         set_lora_weights(params, randn_weight=True)
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            LoraLoaderMixin.save_lora_weights(
+                save_directory=tmpdirname,
+                unet_lora_layers=None,
+                text_encoder_lora_layers=params,
+            )
+            self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
+            pipe.load_lora_weights(tmpdirname)
 
         # inference with lora
         outputs_with_lora = pipe.text_encoder(**dummy_tokens)[0]
@@ -508,8 +504,15 @@ def test_text_encoder_lora_remove_monkey_patch(self):
 
         # monkey patch
         params = pipe._modify_text_encoder(pipe.text_encoder, pipe.lora_scale)
-
         set_lora_weights(params, randn_weight=True)
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            LoraLoaderMixin.save_lora_weights(
+                save_directory=tmpdirname,
+                unet_lora_layers=None,
+                text_encoder_lora_layers=params,
+            )
+            self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
+            pipe.load_lora_weights(tmpdirname)
 
         # inference with lora
         outputs_with_lora = pipe.text_encoder(**dummy_tokens)[0]
@@ -541,8 +544,8 @@ def test_text_encoder_lora_scale(self):
         with tempfile.TemporaryDirectory() as tmpdirname:
             LoraLoaderMixin.save_lora_weights(
                 save_directory=tmpdirname,
-                unet_lora_layers=lora_components["unet_lora_layers"],
-                text_encoder_lora_layers=lora_components["text_encoder_lora_layers"],
+                unet_lora_layers=lora_components["unet_lora_params"],
+                text_encoder_lora_layers=lora_components["text_encoder_lora_params"],
             )
             self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
             sd_pipe.load_lora_weights(tmpdirname)
@@ -592,13 +595,13 @@ def test_unload_lora_sd(self):
         orig_image_slice = original_images[0, -3:, -3:, -1]
 
         # Emulate training.
-        set_lora_weights(lora_components["unet_lora_layers"].parameters(), randn_weight=True)
+        set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True)
         set_lora_weights(lora_components["text_encoder_lora_layers"].parameters(), randn_weight=True)
 
         with tempfile.TemporaryDirectory() as tmpdirname:
             LoraLoaderMixin.save_lora_weights(
                 save_directory=tmpdirname,
-                unet_lora_layers=lora_components["unet_lora_layers"],
+                unet_lora_layers=lora_components["unet_lora_params"],
                 text_encoder_lora_layers=lora_components["text_encoder_lora_layers"],
             )
             self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
@@ -677,7 +680,7 @@ def test_lora_save_load_with_xformers(self):
         with tempfile.TemporaryDirectory() as tmpdirname:
             LoraLoaderMixin.save_lora_weights(
                 save_directory=tmpdirname,
-                unet_lora_layers=lora_components["unet_lora_layers"],
+                unet_lora_layers=lora_components["unet_lora_params"],
                 text_encoder_lora_layers=lora_components["text_encoder_lora_layers"],
             )
             self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
@@ -692,6 +695,8 @@ def test_lora_save_load_with_xformers(self):
 
 @deprecate_after_peft_backend
 class SDXInpaintLoraMixinTests(unittest.TestCase):
+    lora_rank = 4
+
     def get_dummy_inputs(self, device, seed=0, img_res=64, output_pil=True):
         # TODO: use tensor inputs instead of PIL, this is here just to leave the old expected_slices untouched
         if output_pil:
@@ -765,6 +770,11 @@ def get_dummy_components(self):
         text_encoder = CLIPTextModel(text_encoder_config)
         tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
 
+        unet_lora_params = create_unet_lora_layers(unet, rank=self.lora_rank)
+        text_encoder_lora_params = LoraLoaderMixin._modify_text_encoder(
+            text_encoder, dtype=torch.float32, rank=self.lora_rank
+        )
+
         components = {
             "unet": unet,
             "scheduler": scheduler,
@@ -775,12 +785,13 @@ def get_dummy_components(self):
             "feature_extractor": None,
             "image_encoder": None,
         }
-        return components
+        lora_components = {"unet_lora_params": unet_lora_params, "text_encoder_lora_params": text_encoder_lora_params}
+        return components, lora_components
 
     def test_stable_diffusion_inpaint_lora(self):
         device = "cpu"  # ensure determinism for the device-dependent torch.Generator
 
-        components = self.get_dummy_components()
+        components, lora_components = self.get_dummy_components()
         sd_pipe = StableDiffusionInpaintPipeline(**components)
         sd_pipe = sd_pipe.to(torch_device)
         sd_pipe.set_progress_bar_config(disable=None)
@@ -792,9 +803,14 @@ def test_stable_diffusion_inpaint_lora(self):
         image_slice = image[0, -3:, -3:, -1]
 
         # set lora layers
-        lora_attn_procs = create_lora_layers(sd_pipe.unet)
-        sd_pipe.unet.set_attn_processor(lora_attn_procs)
-        sd_pipe = sd_pipe.to(torch_device)
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            LoraLoaderMixin.save_lora_weights(
+                save_directory=tmpdirname,
+                unet_lora_layers=lora_components["unet_lora_params"],
+                text_encoder_lora_layers=lora_components["text_encoder_lora_params"],
+            )
+            self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
+            sd_pipe.load_lora_weights(tmpdirname)
 
         # forward 2
         inputs = self.get_dummy_inputs(device)
@@ -814,6 +830,8 @@ def test_stable_diffusion_inpaint_lora(self):
 
 @deprecate_after_peft_backend
 class SDXLLoraLoaderMixinTests(unittest.TestCase):
+    lora_rank = 4
+
     def get_dummy_components(self):
         torch.manual_seed(0)
         unet = UNet2DConditionModel(
@@ -871,9 +889,13 @@ def get_dummy_components(self):
         text_encoder_2 = CLIPTextModelWithProjection(text_encoder_config)
         tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
 
-        unet_lora_attn_procs, unet_lora_layers = create_unet_lora_layers(unet)
-        text_encoder_one_lora_layers = create_text_encoder_lora_layers(text_encoder)
-        text_encoder_two_lora_layers = create_text_encoder_lora_layers(text_encoder_2)
+        unet_lora_params = create_unet_lora_layers(unet, rank=self.lora_rank)
+        text_encoder_lora_params = StableDiffusionXLLoraLoaderMixin._modify_text_encoder(
+            text_encoder, dtype=torch.float32, rank=self.lora_rank
+        )
+        text_encoder_two_lora_params = StableDiffusionXLLoraLoaderMixin(
+            text_encoder_2, dtype=torch.float32, rank=self.lora_rank
+        )
 
         pipeline_components = {
             "unet": unet,
@@ -887,10 +909,9 @@ def get_dummy_components(self):
             "feature_extractor": None,
         }
         lora_components = {
-            "unet_lora_layers": unet_lora_layers,
-            "text_encoder_one_lora_layers": text_encoder_one_lora_layers,
-            "text_encoder_two_lora_layers": text_encoder_two_lora_layers,
-            "unet_lora_attn_procs": unet_lora_attn_procs,
+            "unet_lora_params": unet_lora_params,
+            "text_encoder_lora_params": text_encoder_lora_params,
+            "text_encoder_two_lora_params": text_encoder_two_lora_params,
         }
         return pipeline_components, lora_components
 
@@ -929,9 +950,9 @@ def test_lora_save_load(self):
         with tempfile.TemporaryDirectory() as tmpdirname:
             StableDiffusionXLPipeline.save_lora_weights(
                 save_directory=tmpdirname,
-                unet_lora_layers=lora_components["unet_lora_layers"],
-                text_encoder_lora_layers=lora_components["text_encoder_one_lora_layers"],
-                text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_layers"],
+                unet_lora_layers=lora_components["unet_lora_params"],
+                text_encoder_lora_layers=lora_components["text_encoder_lora_params"],
+                text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_params"],
             )
             self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
             sd_pipe.load_lora_weights(tmpdirname)
@@ -951,16 +972,16 @@ def test_unload_lora_sdxl(self):
         orig_image_slice = original_images[0, -3:, -3:, -1]
 
         # Emulate training.
-        set_lora_weights(lora_components["unet_lora_layers"].parameters(), randn_weight=True)
-        set_lora_weights(lora_components["text_encoder_one_lora_layers"].parameters(), randn_weight=True)
-        set_lora_weights(lora_components["text_encoder_two_lora_layers"].parameters(), randn_weight=True)
+        set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True)
+        set_lora_weights(lora_components["text_encoder_lora_params"].parameters(), randn_weight=True)
+        set_lora_weights(lora_components["text_encoder_two_lora_params"].parameters(), randn_weight=True)
 
         with tempfile.TemporaryDirectory() as tmpdirname:
             StableDiffusionXLPipeline.save_lora_weights(
                 save_directory=tmpdirname,
-                unet_lora_layers=lora_components["unet_lora_layers"],
-                text_encoder_lora_layers=lora_components["text_encoder_one_lora_layers"],
-                text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_layers"],
+                unet_lora_layers=lora_components["unet_lora_params"],
+                text_encoder_lora_layers=lora_components["text_encoder_lora_params"],
+                text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_params"],
             )
             self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
             sd_pipe.load_lora_weights(tmpdirname)
@@ -992,9 +1013,9 @@ def test_load_lora_locally(self):
         with tempfile.TemporaryDirectory() as tmpdirname:
             StableDiffusionXLPipeline.save_lora_weights(
                 save_directory=tmpdirname,
-                unet_lora_layers=lora_components["unet_lora_layers"],
-                text_encoder_lora_layers=lora_components["text_encoder_one_lora_layers"],
-                text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_layers"],
+                unet_lora_layers=lora_components["unet_lora_params"],
+                text_encoder_lora_layers=lora_components["text_encoder_lora_params"],
+                text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_params"],
                 safe_serialization=False,
             )
             self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.bin")))
@@ -1015,9 +1036,9 @@ def test_text_encoder_lora_state_dict_unchanged(self):
         with tempfile.TemporaryDirectory() as tmpdirname:
             StableDiffusionXLPipeline.save_lora_weights(
                 save_directory=tmpdirname,
-                unet_lora_layers=lora_components["unet_lora_layers"],
-                text_encoder_lora_layers=lora_components["text_encoder_one_lora_layers"],
-                text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_layers"],
+                unet_lora_layers=lora_components["unet_lora_params"],
+                text_encoder_lora_layers=lora_components["text_encoder_lora_params"],
+                text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_params"],
                 safe_serialization=False,
             )
             self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.bin")))
@@ -1050,9 +1071,9 @@ def test_load_lora_locally_safetensors(self):
         with tempfile.TemporaryDirectory() as tmpdirname:
             StableDiffusionXLPipeline.save_lora_weights(
                 save_directory=tmpdirname,
-                unet_lora_layers=lora_components["unet_lora_layers"],
-                text_encoder_lora_layers=lora_components["text_encoder_one_lora_layers"],
-                text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_layers"],
+                unet_lora_layers=lora_components["unet_lora_params"],
+                text_encoder_lora_layers=lora_components["text_encoder_lora_params"],
+                text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_params"],
                 safe_serialization=True,
             )
             self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
@@ -1069,16 +1090,16 @@ def test_lora_fuse_nan(self):
         _, _, pipeline_inputs = self.get_dummy_inputs(with_generator=False)
 
         # Emulate training.
-        set_lora_weights(lora_components["unet_lora_layers"].parameters(), randn_weight=True)
-        set_lora_weights(lora_components["text_encoder_one_lora_layers"].parameters(), randn_weight=True)
-        set_lora_weights(lora_components["text_encoder_two_lora_layers"].parameters(), randn_weight=True)
+        set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True)
+        set_lora_weights(lora_components["text_encoder_lora_params"].parameters(), randn_weight=True)
+        set_lora_weights(lora_components["text_encoder_two_lora_params"].parameters(), randn_weight=True)
 
         with tempfile.TemporaryDirectory() as tmpdirname:
             StableDiffusionXLPipeline.save_lora_weights(
                 save_directory=tmpdirname,
-                unet_lora_layers=lora_components["unet_lora_layers"],
-                text_encoder_lora_layers=lora_components["text_encoder_one_lora_layers"],
-                text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_layers"],
+                unet_lora_layers=lora_components["unet_lora_params"],
+                text_encoder_lora_layers=lora_components["text_encoder_lora_params"],
+                text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_params"],
                 safe_serialization=True,
             )
             self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
@@ -1113,16 +1134,16 @@ def test_lora_fusion(self):
         orig_image_slice = original_images[0, -3:, -3:, -1]
 
         # Emulate training.
-        set_lora_weights(lora_components["unet_lora_layers"].parameters(), randn_weight=True)
-        set_lora_weights(lora_components["text_encoder_one_lora_layers"].parameters(), randn_weight=True)
-        set_lora_weights(lora_components["text_encoder_two_lora_layers"].parameters(), randn_weight=True)
+        set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True)
+        set_lora_weights(lora_components["text_encoder_lora_params"].parameters(), randn_weight=True)
+        set_lora_weights(lora_components["text_encoder_two_lora_params"].parameters(), randn_weight=True)
 
         with tempfile.TemporaryDirectory() as tmpdirname:
             StableDiffusionXLPipeline.save_lora_weights(
                 save_directory=tmpdirname,
-                unet_lora_layers=lora_components["unet_lora_layers"],
-                text_encoder_lora_layers=lora_components["text_encoder_one_lora_layers"],
-                text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_layers"],
+                unet_lora_layers=lora_components["unet_lora_params"],
+                text_encoder_lora_layers=lora_components["text_encoder_lora_params"],
+                text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_params"],
                 safe_serialization=True,
             )
             self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
@@ -1146,16 +1167,16 @@ def test_unfuse_lora(self):
         orig_image_slice = original_images[0, -3:, -3:, -1]
 
         # Emulate training.
-        set_lora_weights(lora_components["unet_lora_layers"].parameters(), randn_weight=True)
-        set_lora_weights(lora_components["text_encoder_one_lora_layers"].parameters(), randn_weight=True)
-        set_lora_weights(lora_components["text_encoder_two_lora_layers"].parameters(), randn_weight=True)
+        set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True)
+        set_lora_weights(lora_components["text_encoder_lora_params"].parameters(), randn_weight=True)
+        set_lora_weights(lora_components["text_encoder_two_lora_params"].parameters(), randn_weight=True)
 
         with tempfile.TemporaryDirectory() as tmpdirname:
             StableDiffusionXLPipeline.save_lora_weights(
                 save_directory=tmpdirname,
-                unet_lora_layers=lora_components["unet_lora_layers"],
-                text_encoder_lora_layers=lora_components["text_encoder_one_lora_layers"],
-                text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_layers"],
+                unet_lora_layers=lora_components["unet_lora_params"],
+                text_encoder_lora_layers=lora_components["text_encoder_lora_params"],
+                text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_params"],
                 safe_serialization=True,
             )
             self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
@@ -1191,16 +1212,16 @@ def test_lora_fusion_is_not_affected_by_unloading(self):
         _ = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images
 
         # Emulate training.
-        set_lora_weights(lora_components["unet_lora_layers"].parameters(), randn_weight=True)
-        set_lora_weights(lora_components["text_encoder_one_lora_layers"].parameters(), randn_weight=True)
-        set_lora_weights(lora_components["text_encoder_two_lora_layers"].parameters(), randn_weight=True)
+        set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True)
+        set_lora_weights(lora_components["text_encoder_lora_params"].parameters(), randn_weight=True)
+        set_lora_weights(lora_components["text_encoder_two_lora_params"].parameters(), randn_weight=True)
 
         with tempfile.TemporaryDirectory() as tmpdirname:
             StableDiffusionXLPipeline.save_lora_weights(
                 save_directory=tmpdirname,
-                unet_lora_layers=lora_components["unet_lora_layers"],
-                text_encoder_lora_layers=lora_components["text_encoder_one_lora_layers"],
-                text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_layers"],
+                unet_lora_layers=lora_components["unet_lora_params"],
+                text_encoder_lora_layers=lora_components["text_encoder_lora_params"],
+                text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_params"],
                 safe_serialization=True,
             )
             self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
@@ -1230,16 +1251,16 @@ def test_fuse_lora_with_different_scales(self):
         _ = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images
 
         # Emulate training.
-        set_lora_weights(lora_components["unet_lora_layers"].parameters(), randn_weight=True)
-        set_lora_weights(lora_components["text_encoder_one_lora_layers"].parameters(), randn_weight=True)
-        set_lora_weights(lora_components["text_encoder_two_lora_layers"].parameters(), randn_weight=True)
+        set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True)
+        set_lora_weights(lora_components["text_encoder_lora_params"].parameters(), randn_weight=True)
+        set_lora_weights(lora_components["text_encoder_two_lora_params"].parameters(), randn_weight=True)
 
         with tempfile.TemporaryDirectory() as tmpdirname:
             StableDiffusionXLPipeline.save_lora_weights(
                 save_directory=tmpdirname,
-                unet_lora_layers=lora_components["unet_lora_layers"],
-                text_encoder_lora_layers=lora_components["text_encoder_one_lora_layers"],
-                text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_layers"],
+                unet_lora_layers=lora_components["unet_lora_params"],
+                text_encoder_lora_layers=lora_components["text_encoder_lora_params"],
+                text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_params"],
                 safe_serialization=True,
             )
             self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
@@ -1255,9 +1276,9 @@ def test_fuse_lora_with_different_scales(self):
         with tempfile.TemporaryDirectory() as tmpdirname:
             StableDiffusionXLPipeline.save_lora_weights(
                 save_directory=tmpdirname,
-                unet_lora_layers=lora_components["unet_lora_layers"],
-                text_encoder_lora_layers=lora_components["text_encoder_one_lora_layers"],
-                text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_layers"],
+                unet_lora_layers=lora_components["unet_lora_params"],
+                text_encoder_lora_layers=lora_components["text_encoder_lora_params"],
+                text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_params"],
                 safe_serialization=True,
             )
             self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
@@ -1282,16 +1303,16 @@ def test_with_different_scales(self):
         original_imagee_slice = original_images[0, -3:, -3:, -1]
 
         # Emulate training.
-        set_lora_weights(lora_components["unet_lora_layers"].parameters(), randn_weight=True)
-        set_lora_weights(lora_components["text_encoder_one_lora_layers"].parameters(), randn_weight=True)
-        set_lora_weights(lora_components["text_encoder_two_lora_layers"].parameters(), randn_weight=True)
+        set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True)
+        set_lora_weights(lora_components["text_encoder_lora_params"].parameters(), randn_weight=True)
+        set_lora_weights(lora_components["text_encoder_two_lora_params"].parameters(), randn_weight=True)
 
         with tempfile.TemporaryDirectory() as tmpdirname:
             StableDiffusionXLPipeline.save_lora_weights(
                 save_directory=tmpdirname,
-                unet_lora_layers=lora_components["unet_lora_layers"],
-                text_encoder_lora_layers=lora_components["text_encoder_one_lora_layers"],
-                text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_layers"],
+                unet_lora_layers=lora_components["unet_lora_params"],
+                text_encoder_lora_layers=lora_components["text_encoder_lora_params"],
+                text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_params"],
                 safe_serialization=True,
             )
             self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
@@ -1330,16 +1351,16 @@ def test_with_different_scales_fusion_equivalence(self):
         images_slice = images[0, -3:, -3:, -1]
 
         # Emulate training.
-        set_lora_weights(lora_components["unet_lora_layers"].parameters(), randn_weight=True, var=0.1)
-        set_lora_weights(lora_components["text_encoder_one_lora_layers"].parameters(), randn_weight=True, var=0.1)
-        set_lora_weights(lora_components["text_encoder_two_lora_layers"].parameters(), randn_weight=True, var=0.1)
+        set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True, var=0.1)
+        set_lora_weights(lora_components["text_encoder_lora_params"].parameters(), randn_weight=True, var=0.1)
+        set_lora_weights(lora_components["text_encoder_two_lora_params"].parameters(), randn_weight=True, var=0.1)
 
         with tempfile.TemporaryDirectory() as tmpdirname:
             StableDiffusionXLPipeline.save_lora_weights(
                 save_directory=tmpdirname,
-                unet_lora_layers=lora_components["unet_lora_layers"],
-                text_encoder_lora_layers=lora_components["text_encoder_one_lora_layers"],
-                text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_layers"],
+                unet_lora_layers=lora_components["unet_lora_params"],
+                text_encoder_lora_layers=lora_components["text_encoder_lora_params"],
+                text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_params"],
                 safe_serialization=True,
             )
             self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
@@ -1377,16 +1398,16 @@ def test_save_load_fused_lora_modules(self):
         _, _, pipeline_inputs = self.get_dummy_inputs(with_generator=False)
 
         # Emulate training.
-        set_lora_weights(lora_components["unet_lora_layers"].parameters(), randn_weight=True, var=0.1)
-        set_lora_weights(lora_components["text_encoder_one_lora_layers"].parameters(), randn_weight=True, var=0.1)
-        set_lora_weights(lora_components["text_encoder_two_lora_layers"].parameters(), randn_weight=True, var=0.1)
+        set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True, var=0.1)
+        set_lora_weights(lora_components["text_encoder_lora_params"].parameters(), randn_weight=True, var=0.1)
+        set_lora_weights(lora_components["text_encoder_two_lora_params"].parameters(), randn_weight=True, var=0.1)
 
         with tempfile.TemporaryDirectory() as tmpdirname:
             StableDiffusionXLPipeline.save_lora_weights(
                 save_directory=tmpdirname,
-                unet_lora_layers=lora_components["unet_lora_layers"],
-                text_encoder_lora_layers=lora_components["text_encoder_one_lora_layers"],
-                text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_layers"],
+                unet_lora_layers=lora_components["unet_lora_params"],
+                text_encoder_lora_layers=lora_components["text_encoder_lora_params"],
+                text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_params"],
                 safe_serialization=True,
             )
             self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
@@ -1460,10 +1481,10 @@ def test_lora_processors(self):
         with torch.no_grad():
             sample1 = model(**inputs_dict).sample
 
-        lora_attn_procs = create_lora_layers(model)
+        lora_params = create_unet_lora_layers(model)
 
         # make sure we can set a list of attention processors
-        model.set_attn_processor(lora_attn_procs)
+        model.load_attn_procs(lora_params)
         model.to(torch_device)
 
         # test that attn processors can be set to itself
@@ -1480,120 +1501,6 @@ def test_lora_processors(self):
         # sample 2 and sample 3 should be different
         assert (sample2 - sample3).abs().max() > 1e-4
 
-    def test_lora_save_load(self):
-        # enable deterministic behavior for gradient checkpointing
-        init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
-
-        init_dict["attention_head_dim"] = (8, 16)
-
-        torch.manual_seed(0)
-        model = self.model_class(**init_dict)
-        model.to(torch_device)
-
-        with torch.no_grad():
-            old_sample = model(**inputs_dict).sample
-
-        lora_attn_procs = create_lora_layers(model)
-        model.set_attn_processor(lora_attn_procs)
-
-        with torch.no_grad():
-            sample = model(**inputs_dict, cross_attention_kwargs={"scale": 0.5}).sample
-
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            model.save_attn_procs(tmpdirname, safe_serialization=False)
-            self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.bin")))
-            torch.manual_seed(0)
-            new_model = self.model_class(**init_dict)
-            new_model.to(torch_device)
-            new_model.load_attn_procs(tmpdirname)
-
-        with torch.no_grad():
-            new_sample = new_model(**inputs_dict, cross_attention_kwargs={"scale": 0.5}).sample
-
-        assert (sample - new_sample).abs().max() < 5e-4
-
-        # LoRA and no LoRA should NOT be the same
-        assert (sample - old_sample).abs().max() > 5e-4
-
-    def test_lora_save_load_safetensors(self):
-        # enable deterministic behavior for gradient checkpointing
-        init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
-
-        init_dict["attention_head_dim"] = (8, 16)
-
-        torch.manual_seed(0)
-        model = self.model_class(**init_dict)
-        model.to(torch_device)
-
-        with torch.no_grad():
-            old_sample = model(**inputs_dict).sample
-
-        lora_attn_procs = create_lora_layers(model)
-        model.set_attn_processor(lora_attn_procs)
-
-        with torch.no_grad():
-            sample = model(**inputs_dict, cross_attention_kwargs={"scale": 0.5}).sample
-
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            model.save_attn_procs(tmpdirname, safe_serialization=True)
-            self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
-            torch.manual_seed(0)
-            new_model = self.model_class(**init_dict)
-            new_model.to(torch_device)
-            new_model.load_attn_procs(tmpdirname)
-
-        with torch.no_grad():
-            new_sample = new_model(**inputs_dict, cross_attention_kwargs={"scale": 0.5}).sample
-
-        assert (sample - new_sample).abs().max() < 1e-4
-
-        # LoRA and no LoRA should NOT be the same
-        assert (sample - old_sample).abs().max() > 1e-4
-
-    def test_lora_save_safetensors_load_torch(self):
-        # enable deterministic behavior for gradient checkpointing
-        init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
-
-        init_dict["attention_head_dim"] = (8, 16)
-
-        torch.manual_seed(0)
-        model = self.model_class(**init_dict)
-        model.to(torch_device)
-
-        lora_attn_procs = create_lora_layers(model, mock_weights=False)
-        model.set_attn_processor(lora_attn_procs)
-        # Saving as torch, properly reloads with directly filename
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            model.save_attn_procs(tmpdirname, safe_serialization=True)
-            self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
-            torch.manual_seed(0)
-            new_model = self.model_class(**init_dict)
-            new_model.to(torch_device)
-            new_model.load_attn_procs(tmpdirname, weight_name="pytorch_lora_weights.safetensors")
-
-    def test_lora_save_torch_force_load_safetensors_error(self):
-        # enable deterministic behavior for gradient checkpointing
-        init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
-
-        init_dict["attention_head_dim"] = (8, 16)
-
-        torch.manual_seed(0)
-        model = self.model_class(**init_dict)
-        model.to(torch_device)
-
-        lora_attn_procs = create_lora_layers(model, mock_weights=False)
-        model.set_attn_processor(lora_attn_procs)
-        # Saving as torch, properly reloads with directly filename
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            model.save_attn_procs(tmpdirname, safe_serialization=False)
-            self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.bin")))
-            torch.manual_seed(0)
-            new_model = self.model_class(**init_dict)
-            new_model.to(torch_device)
-            with self.assertRaises(IOError) as e:
-                new_model.load_attn_procs(tmpdirname, use_safetensors=True)
-            self.assertIn("Error no file named pytorch_lora_weights.safetensors", str(e.exception))
-
     def test_lora_on_off(self, expected_max_diff=1e-3):
         # enable deterministic behavior for gradient checkpointing
         init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
@@ -1607,8 +1514,8 @@ def test_lora_on_off(self, expected_max_diff=1e-3):
         with torch.no_grad():
             old_sample = model(**inputs_dict).sample
 
-        lora_attn_procs = create_lora_layers(model)
-        model.set_attn_processor(lora_attn_procs)
+        lora_params = create_unet_lora_layers(model)
+        model.load_attn_procs(lora_params)
 
         with torch.no_grad():
             sample = model(**inputs_dict, cross_attention_kwargs={"scale": 0.0}).sample
@@ -1637,8 +1544,8 @@ def test_lora_xformers_on_off(self, expected_max_diff=6e-4):
         torch.manual_seed(0)
         model = self.model_class(**init_dict)
         model.to(torch_device)
-        lora_attn_procs = create_lora_layers(model)
-        model.set_attn_processor(lora_attn_procs)
+        lora_params = create_unet_lora_layers(model)
+        model.load_attn_procs(lora_params)
 
         # default
         with torch.no_grad():
@@ -1712,10 +1619,10 @@ def test_lora_processors(self):
         with torch.no_grad():
             sample1 = model(**inputs_dict).sample
 
-        lora_attn_procs = create_lora_3d_layers(model)
+        unet_lora_params = create_unet_lora_layers(model, is_3d=True)
 
         # make sure we can set a list of attention processors
-        model.set_attn_processor(lora_attn_procs)
+        model.load_attn_procs(unet_lora_params)
         model.to(torch_device)
 
         # test that attn processors can be set to itself
@@ -1732,172 +1639,6 @@ def test_lora_processors(self):
         # sample 2 and sample 3 should be different
         assert (sample2 - sample3).abs().max() > 3e-3
 
-    def test_lora_save_load(self):
-        init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
-
-        init_dict["attention_head_dim"] = 8
-
-        torch.manual_seed(0)
-        model = self.model_class(**init_dict)
-        model.to(torch_device)
-
-        with torch.no_grad():
-            old_sample = model(**inputs_dict).sample
-
-        lora_attn_procs = create_lora_3d_layers(model)
-        model.set_attn_processor(lora_attn_procs)
-
-        with torch.no_grad():
-            sample = model(**inputs_dict, cross_attention_kwargs={"scale": 0.5}).sample
-
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            model.save_attn_procs(tmpdirname, safe_serialization=False)
-            self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.bin")))
-            torch.manual_seed(0)
-            new_model = self.model_class(**init_dict)
-            new_model.to(torch_device)
-            new_model.load_attn_procs(tmpdirname)
-
-        with torch.no_grad():
-            new_sample = new_model(**inputs_dict, cross_attention_kwargs={"scale": 0.5}).sample
-
-        assert (sample - new_sample).abs().max() < 5e-3
-
-        # LoRA and no LoRA should NOT be the same
-        assert (sample - old_sample).abs().max() > 1e-4
-
-    def test_lora_save_load_safetensors(self):
-        init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
-
-        init_dict["attention_head_dim"] = 8
-
-        torch.manual_seed(0)
-        model = self.model_class(**init_dict)
-        model.to(torch_device)
-
-        with torch.no_grad():
-            old_sample = model(**inputs_dict).sample
-
-        lora_attn_procs = create_lora_3d_layers(model)
-        model.set_attn_processor(lora_attn_procs)
-
-        with torch.no_grad():
-            sample = model(**inputs_dict, cross_attention_kwargs={"scale": 0.5}).sample
-
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            model.save_attn_procs(tmpdirname, safe_serialization=True)
-            self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
-            torch.manual_seed(0)
-            new_model = self.model_class(**init_dict)
-            new_model.to(torch_device)
-            new_model.load_attn_procs(tmpdirname)
-
-        with torch.no_grad():
-            new_sample = new_model(**inputs_dict, cross_attention_kwargs={"scale": 0.5}).sample
-
-        assert (sample - new_sample).abs().max() < 3e-3
-
-        # LoRA and no LoRA should NOT be the same
-        assert (sample - old_sample).abs().max() > 1e-4
-
-    def test_lora_save_safetensors_load_torch(self):
-        # enable deterministic behavior for gradient checkpointing
-        init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
-
-        init_dict["attention_head_dim"] = 8
-
-        torch.manual_seed(0)
-        model = self.model_class(**init_dict)
-        model.to(torch_device)
-
-        lora_attn_procs = create_lora_3d_layers(model, mock_weights=False)
-        model.set_attn_processor(lora_attn_procs)
-        # Saving as torch, properly reloads with directly filename
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            model.save_attn_procs(tmpdirname)
-            self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
-            torch.manual_seed(0)
-            new_model = self.model_class(**init_dict)
-            new_model.to(torch_device)
-            new_model.load_attn_procs(tmpdirname, weight_name="pytorch_lora_weights.safetensors")
-
-    def test_lora_save_torch_force_load_safetensors_error(self):
-        init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
-
-        init_dict["attention_head_dim"] = 8
-
-        torch.manual_seed(0)
-        model = self.model_class(**init_dict)
-        model.to(torch_device)
-
-        lora_attn_procs = create_lora_3d_layers(model, mock_weights=False)
-        model.set_attn_processor(lora_attn_procs)
-        # Saving as torch, properly reloads with directly filename
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            model.save_attn_procs(tmpdirname, safe_serialization=False)
-            self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.bin")))
-            torch.manual_seed(0)
-            new_model = self.model_class(**init_dict)
-            new_model.to(torch_device)
-            with self.assertRaises(IOError) as e:
-                new_model.load_attn_procs(tmpdirname, use_safetensors=True)
-            self.assertIn("Error no file named pytorch_lora_weights.safetensors", str(e.exception))
-
-    def test_lora_on_off(self):
-        init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
-
-        init_dict["attention_head_dim"] = 8
-
-        torch.manual_seed(0)
-        model = self.model_class(**init_dict)
-        model.to(torch_device)
-
-        with torch.no_grad():
-            old_sample = model(**inputs_dict).sample
-
-        lora_attn_procs = create_lora_3d_layers(model)
-        model.set_attn_processor(lora_attn_procs)
-
-        with torch.no_grad():
-            sample = model(**inputs_dict, cross_attention_kwargs={"scale": 0.0}).sample
-
-        model.set_default_attn_processor()
-
-        with torch.no_grad():
-            new_sample = model(**inputs_dict).sample
-
-        assert (sample - new_sample).abs().max() < 1e-4
-        assert (sample - old_sample).abs().max() < 3e-3
-
-    @unittest.skipIf(
-        torch_device != "cuda" or not is_xformers_available(),
-        reason="XFormers attention is only available with CUDA and `xformers` installed",
-    )
-    def test_lora_xformers_on_off(self):
-        # enable deterministic behavior for gradient checkpointing
-        init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
-
-        init_dict["attention_head_dim"] = 4
-
-        torch.manual_seed(0)
-        model = self.model_class(**init_dict)
-        model.to(torch_device)
-        lora_attn_procs = create_lora_3d_layers(model)
-        model.set_attn_processor(lora_attn_procs)
-
-        # default
-        with torch.no_grad():
-            sample = model(**inputs_dict).sample
-
-            model.enable_xformers_memory_efficient_attention()
-            on_sample = model(**inputs_dict).sample
-
-            model.disable_xformers_memory_efficient_attention()
-            off_sample = model(**inputs_dict).sample
-
-        assert (sample - on_sample).abs().max() < 1e-4
-        assert (sample - off_sample).abs().max() < 1e-4
-
 
 @slow
 @deprecate_after_peft_backend

From a5e0951f8621c666f487cb2356dec59fa6088cf5 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 08:53:44 +0530
Subject: [PATCH 02/44] fix

---
 tests/lora/test_lora_layers_old_backend.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index 2b99655c74c0..cbd274d504d0 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -893,7 +893,7 @@ def get_dummy_components(self):
         text_encoder_lora_params = StableDiffusionXLLoraLoaderMixin._modify_text_encoder(
             text_encoder, dtype=torch.float32, rank=self.lora_rank
         )
-        text_encoder_two_lora_params = StableDiffusionXLLoraLoaderMixin(
+        text_encoder_two_lora_params = StableDiffusionXLLoraLoaderMixin._modify_text_encoder(
             text_encoder_2, dtype=torch.float32, rank=self.lora_rank
         )
 

From 2f273ea6051bfecd58f6737e2896511b54c96acb Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 09:06:18 +0530
Subject: [PATCH 03/44] wrap into unet_lora_state_dict

---
 tests/lora/test_lora_layers_old_backend.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index cbd274d504d0..7f6989f8ae99 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -48,6 +48,7 @@
     XFormersAttnProcessor,
 )
 from diffusers.models.lora import LoRALinearLayer
+from diffusers.training_utils import unet_lora_state_dict
 from diffusers.utils.import_utils import is_xformers_available
 from diffusers.utils.testing_utils import (
     deprecate_after_peft_backend,
@@ -144,7 +145,7 @@ def create_unet_lora_layers(unet: nn.Module, rank=4, is_3d=False, mock_weights=T
         unet_lora_parameters.extend(attn_module.to_v.lora_layer.parameters())
         unet_lora_parameters.extend(attn_module.to_out[0].lora_layer.parameters())
 
-    return unet_lora_parameters
+    return unet_lora_state_dict(unet)
 
 
 # def create_text_encoder_lora_attn_procs(text_encoder: nn.Module):

From 015e4a1f57fbaa3beaf023cfead83397e7fddfcc Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 09:11:08 +0530
Subject: [PATCH 04/44] utilize text_encoder_lora_params

---
 tests/lora/test_lora_layers_old_backend.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index 7f6989f8ae99..f545f13580bd 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -40,7 +40,7 @@
     UNet2DConditionModel,
     UNet3DConditionModel,
 )
-from diffusers.loaders import LoraLoaderMixin, StableDiffusionXLLoraLoaderMixin
+from diffusers.loaders import LoraLoaderMixin, StableDiffusionXLLoraLoaderMixin, text_encoder_lora_state_dict
 from diffusers.models.attention_processor import (
     Attention,
     AttnProcessor,
@@ -276,6 +276,7 @@ def get_dummy_components(self):
         text_encoder_lora_params = LoraLoaderMixin._modify_text_encoder(
             text_encoder, dtype=torch.float32, rank=self.lora_rank
         )
+        text_encoder_lora_params = text_encoder_lora_state_dict(text_encoder_lora_params)
 
         pipeline_components = {
             "unet": unet,
@@ -775,6 +776,7 @@ def get_dummy_components(self):
         text_encoder_lora_params = LoraLoaderMixin._modify_text_encoder(
             text_encoder, dtype=torch.float32, rank=self.lora_rank
         )
+        text_encoder_lora_params = text_encoder_lora_state_dict(text_encoder_lora_params)
 
         components = {
             "unet": unet,
@@ -894,9 +896,11 @@ def get_dummy_components(self):
         text_encoder_lora_params = StableDiffusionXLLoraLoaderMixin._modify_text_encoder(
             text_encoder, dtype=torch.float32, rank=self.lora_rank
         )
+        text_encoder_lora_params = text_encoder_lora_state_dict(text_encoder_lora_params)
         text_encoder_two_lora_params = StableDiffusionXLLoraLoaderMixin._modify_text_encoder(
             text_encoder_2, dtype=torch.float32, rank=self.lora_rank
         )
+        text_encoder_two_lora_params = text_encoder_lora_state_dict(text_encoder_two_lora_params)
 
         pipeline_components = {
             "unet": unet,

From 08289e1b828b85cc024e127dff4aab8f0a11970a Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 09:13:51 +0530
Subject: [PATCH 05/44] utilize text_encoder_attn_modules

---
 tests/lora/test_lora_layers_old_backend.py | 35 +++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index f545f13580bd..44abfd26554f 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -40,7 +40,7 @@
     UNet2DConditionModel,
     UNet3DConditionModel,
 )
-from diffusers.loaders import LoraLoaderMixin, StableDiffusionXLLoraLoaderMixin, text_encoder_lora_state_dict
+from diffusers.loaders import LoraLoaderMixin, StableDiffusionXLLoraLoaderMixin
 from diffusers.models.attention_processor import (
     Attention,
     AttnProcessor,
@@ -88,6 +88,39 @@
 #     return lora_attn_procs
 
 
+def text_encoder_attn_modules(text_encoder):
+    attn_modules = []
+
+    if isinstance(text_encoder, (CLIPTextModel, CLIPTextModelWithProjection)):
+        for i, layer in enumerate(text_encoder.text_model.encoder.layers):
+            name = f"text_model.encoder.layers.{i}.self_attn"
+            mod = layer.self_attn
+            attn_modules.append((name, mod))
+    else:
+        raise ValueError(f"do not know how to get attention modules for: {text_encoder.__class__.__name__}")
+
+    return attn_modules
+
+
+def text_encoder_lora_state_dict(text_encoder):
+    state_dict = {}
+
+    for name, module in text_encoder_attn_modules(text_encoder):
+        for k, v in module.q_proj.lora_linear_layer.state_dict().items():
+            state_dict[f"{name}.q_proj.lora_linear_layer.{k}"] = v
+
+        for k, v in module.k_proj.lora_linear_layer.state_dict().items():
+            state_dict[f"{name}.k_proj.lora_linear_layer.{k}"] = v
+
+        for k, v in module.v_proj.lora_linear_layer.state_dict().items():
+            state_dict[f"{name}.v_proj.lora_linear_layer.{k}"] = v
+
+        for k, v in module.out_proj.lora_linear_layer.state_dict().items():
+            state_dict[f"{name}.out_proj.lora_linear_layer.{k}"] = v
+
+    return state_dict
+
+
 def create_unet_lora_layers(unet: nn.Module, rank=4, is_3d=False, mock_weights=True):
     unet_lora_parameters = []
     in_features = None

From 604936f385f5bd05cc225c78b9172d85e054c42a Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 09:15:10 +0530
Subject: [PATCH 06/44] debug

---
 tests/lora/test_lora_layers_old_backend.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index 44abfd26554f..203842556eb7 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -90,6 +90,7 @@
 
 def text_encoder_attn_modules(text_encoder):
     attn_modules = []
+    print(f"text encoder type: {type(text_encoder)}")
 
     if isinstance(text_encoder, (CLIPTextModel, CLIPTextModelWithProjection)):
         for i, layer in enumerate(text_encoder.text_model.encoder.layers):

From 91c58887a283452ab0221f0446e9ce2a5d680681 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 09:23:20 +0530
Subject: [PATCH 07/44] debug

---
 tests/lora/test_lora_layers_old_backend.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index 203842556eb7..ef5669f1446a 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -310,7 +310,7 @@ def get_dummy_components(self):
         text_encoder_lora_params = LoraLoaderMixin._modify_text_encoder(
             text_encoder, dtype=torch.float32, rank=self.lora_rank
         )
-        text_encoder_lora_params = text_encoder_lora_state_dict(text_encoder_lora_params)
+        text_encoder_lora_params = text_encoder_lora_state_dict(text_encoder)
 
         pipeline_components = {
             "unet": unet,
@@ -810,7 +810,7 @@ def get_dummy_components(self):
         text_encoder_lora_params = LoraLoaderMixin._modify_text_encoder(
             text_encoder, dtype=torch.float32, rank=self.lora_rank
         )
-        text_encoder_lora_params = text_encoder_lora_state_dict(text_encoder_lora_params)
+        text_encoder_lora_params = text_encoder_lora_state_dict(text_encoder)
 
         components = {
             "unet": unet,
@@ -930,11 +930,11 @@ def get_dummy_components(self):
         text_encoder_lora_params = StableDiffusionXLLoraLoaderMixin._modify_text_encoder(
             text_encoder, dtype=torch.float32, rank=self.lora_rank
         )
-        text_encoder_lora_params = text_encoder_lora_state_dict(text_encoder_lora_params)
+        text_encoder_lora_params = text_encoder_lora_state_dict(text_encoder)
         text_encoder_two_lora_params = StableDiffusionXLLoraLoaderMixin._modify_text_encoder(
             text_encoder_2, dtype=torch.float32, rank=self.lora_rank
         )
-        text_encoder_two_lora_params = text_encoder_lora_state_dict(text_encoder_two_lora_params)
+        text_encoder_two_lora_params = text_encoder_lora_state_dict(text_encoder_2)
 
         pipeline_components = {
             "unet": unet,

From 3f10766804f7f6e403e5e835c38b800270e3a3e2 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 09:23:48 +0530
Subject: [PATCH 08/44] remove print

---
 tests/lora/test_lora_layers_old_backend.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index ef5669f1446a..73ee4a1a9661 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -90,7 +90,6 @@
 
 def text_encoder_attn_modules(text_encoder):
     attn_modules = []
-    print(f"text encoder type: {type(text_encoder)}")
 
     if isinstance(text_encoder, (CLIPTextModel, CLIPTextModelWithProjection)):
         for i, layer in enumerate(text_encoder.text_model.encoder.layers):

From 4ce6112c532a654f06fb1f3dfb72ad9a3580819e Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 09:30:35 +0530
Subject: [PATCH 09/44] don't use text encoder for test_stable_diffusion_lora

---
 tests/lora/test_lora_layers_old_backend.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index 73ee4a1a9661..f1bcb2bc0180 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -398,7 +398,7 @@ def test_stable_diffusion_lora(self):
             LoraLoaderMixin.save_lora_weights(
                 save_directory=tmpdirname,
                 unet_lora_layers=lora_components["unet_lora_params"],
-                text_encoder_lora_layers=lora_components["text_encoder_lora_params"],
+                text_encoder_lora_layers=None,
             )
             self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
             sd_pipe.load_lora_weights(tmpdirname)

From 7020cec27dde080f3e829272dccf8f3831a04b78 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 09:37:11 +0530
Subject: [PATCH 10/44] load the procs.

---
 tests/lora/test_lora_layers_old_backend.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index f1bcb2bc0180..19954c0ed19d 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -394,14 +394,15 @@ def test_stable_diffusion_lora(self):
         image_slice = image[0, -3:, -3:, -1]
 
         # set lora layers
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            LoraLoaderMixin.save_lora_weights(
-                save_directory=tmpdirname,
-                unet_lora_layers=lora_components["unet_lora_params"],
-                text_encoder_lora_layers=None,
-            )
-            self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
-            sd_pipe.load_lora_weights(tmpdirname)
+        sd_pipe.unet.load_attn_procs(lora_components["unet_lora_params"])
+        # with tempfile.TemporaryDirectory() as tmpdirname:
+        #     LoraLoaderMixin.save_lora_weights(
+        #         save_directory=tmpdirname,
+        #         unet_lora_layers=lora_components["unet_lora_params"],
+        #         text_encoder_lora_layers=None,
+        #     )
+        #     self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
+        #     sd_pipe.load_lora_weights(tmpdirname)
 
         # forward 2
         _, _, inputs = self.get_dummy_inputs()

From a72aba288435d195234672daf4cd71113585929f Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 09:42:32 +0530
Subject: [PATCH 11/44] set_default_attn_processor

---
 tests/lora/test_lora_layers_old_backend.py | 16 +---------------
 1 file changed, 1 insertion(+), 15 deletions(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index 19954c0ed19d..f6fc57d5bb94 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -122,7 +122,6 @@ def text_encoder_lora_state_dict(text_encoder):
 
 
 def create_unet_lora_layers(unet: nn.Module, rank=4, is_3d=False, mock_weights=True):
-    unet_lora_parameters = []
     in_features = None
 
     for attn_processor_name, attn_processor in unet.attn_processors.items():
@@ -172,12 +171,6 @@ def create_unet_lora_layers(unet: nn.Module, rank=4, is_3d=False, mock_weights=T
                 attn_module.to_v.lora_layer.up.weight += 1
                 attn_module.to_out[0].lora_layer.up.weight += 1
 
-        # Accumulate the LoRA params to optimize.
-        unet_lora_parameters.extend(attn_module.to_q.lora_layer.parameters())
-        unet_lora_parameters.extend(attn_module.to_k.lora_layer.parameters())
-        unet_lora_parameters.extend(attn_module.to_v.lora_layer.parameters())
-        unet_lora_parameters.extend(attn_module.to_out[0].lora_layer.parameters())
-
     return unet_lora_state_dict(unet)
 
 
@@ -385,6 +378,7 @@ def test_stable_diffusion_lora(self):
         sd_pipe = StableDiffusionPipeline(**components)
         sd_pipe = sd_pipe.to(torch_device)
         sd_pipe.set_progress_bar_config(disable=None)
+        sd_pipe.set_default_attn_processor()
 
         # forward 1
         _, _, inputs = self.get_dummy_inputs()
@@ -395,14 +389,6 @@ def test_stable_diffusion_lora(self):
 
         # set lora layers
         sd_pipe.unet.load_attn_procs(lora_components["unet_lora_params"])
-        # with tempfile.TemporaryDirectory() as tmpdirname:
-        #     LoraLoaderMixin.save_lora_weights(
-        #         save_directory=tmpdirname,
-        #         unet_lora_layers=lora_components["unet_lora_params"],
-        #         text_encoder_lora_layers=None,
-        #     )
-        #     self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
-        #     sd_pipe.load_lora_weights(tmpdirname)
 
         # forward 2
         _, _, inputs = self.get_dummy_inputs()

From bdb2f6bac764583fc5c0b1f3c0c09e022f79036f Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 09:44:53 +0530
Subject: [PATCH 12/44] fix: set_default_attn_processor call.

---
 tests/lora/test_lora_layers_old_backend.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index f6fc57d5bb94..69108793ed8c 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -378,7 +378,7 @@ def test_stable_diffusion_lora(self):
         sd_pipe = StableDiffusionPipeline(**components)
         sd_pipe = sd_pipe.to(torch_device)
         sd_pipe.set_progress_bar_config(disable=None)
-        sd_pipe.set_default_attn_processor()
+        sd_pipe.unet.set_default_attn_processor()
 
         # forward 1
         _, _, inputs = self.get_dummy_inputs()

From 3a35ceb565515ea1d72733ce3bd979b030c2acf8 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 09:46:48 +0530
Subject: [PATCH 13/44] fix: lora_components[unet_lora_params]

---
 tests/lora/test_lora_layers_old_backend.py | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index 69108793ed8c..bb3e7ea597d4 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -818,6 +818,7 @@ def test_stable_diffusion_inpaint_lora(self):
         sd_pipe = StableDiffusionInpaintPipeline(**components)
         sd_pipe = sd_pipe.to(torch_device)
         sd_pipe.set_progress_bar_config(disable=None)
+        sd_pipe.unet.set_default_attn_processor()
 
         # forward 1
         inputs = self.get_dummy_inputs(device)
@@ -826,14 +827,7 @@ def test_stable_diffusion_inpaint_lora(self):
         image_slice = image[0, -3:, -3:, -1]
 
         # set lora layers
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            LoraLoaderMixin.save_lora_weights(
-                save_directory=tmpdirname,
-                unet_lora_layers=lora_components["unet_lora_params"],
-                text_encoder_lora_layers=lora_components["text_encoder_lora_params"],
-            )
-            self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
-            sd_pipe.load_lora_weights(tmpdirname)
+        sd_pipe.unet.load_attn_procs(lora_components["unet_lora_params"])
 
         # forward 2
         inputs = self.get_dummy_inputs(device)

From 18af8e791903684bca01a9c715f38bc946a84ca3 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 09:59:33 +0530
Subject: [PATCH 14/44] checking for 3d.

---
 tests/lora/test_lora_layers_old_backend.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index bb3e7ea597d4..dd7e213687e5 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -123,11 +123,16 @@ def text_encoder_lora_state_dict(text_encoder):
 
 def create_unet_lora_layers(unet: nn.Module, rank=4, is_3d=False, mock_weights=True):
     in_features = None
+    out_features = None
 
     for attn_processor_name, attn_processor in unet.attn_processors.items():
         if is_3d and attn_processor_name.startswith("transformer_in"):
             # Note that the `8 * ...` comes from: https://github.com/huggingface/diffusers/blob/7139f0e874f10b2463caa8cbd585762a309d12d6/src/diffusers/models/unet_3d_condition.py#L148
             in_features = 8 * unet.config.attention_head_dim
+            has_cross_attention = attn_processor_name.endswith("attn2.processor") and not (
+                attn_processor_name.startswith("transformer_in") or "temp_attentions" in attn_processor_name.split(".")
+            )
+            out_features = unet.config.cross_attention_dim if has_cross_attention else None
 
         # Parse the attention module.
         attn_module = unet
@@ -138,28 +143,28 @@ def create_unet_lora_layers(unet: nn.Module, rank=4, is_3d=False, mock_weights=T
         attn_module.to_q.set_lora_layer(
             LoRALinearLayer(
                 in_features=attn_module.to_q.in_features if in_features is None else in_features,
-                out_features=attn_module.to_q.out_features,
+                out_features=attn_module.to_q.out_features if out_features is None else out_features,
                 rank=rank,
             )
         )
         attn_module.to_k.set_lora_layer(
             LoRALinearLayer(
                 in_features=attn_module.to_k.in_features if in_features is None else in_features,
-                out_features=attn_module.to_k.out_features,
+                out_features=attn_module.to_k.out_features if out_features is None else out_features,
                 rank=rank,
             )
         )
         attn_module.to_v.set_lora_layer(
             LoRALinearLayer(
                 in_features=attn_module.to_v.in_features if in_features is None else in_features,
-                out_features=attn_module.to_v.out_features,
+                out_features=attn_module.to_v.out_features if out_features is None else out_features,
                 rank=rank,
             )
         )
         attn_module.to_out[0].set_lora_layer(
             LoRALinearLayer(
                 in_features=attn_module.to_out[0].in_features if in_features is None else in_features,
-                out_features=attn_module.to_out[0].out_features,
+                out_features=attn_module.to_out[0].out_features if out_features is None else out_features,
                 rank=rank,
             )
         )

From 5b70ddbb2b18b63e060258ccf2579ae04eb3d8d6 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 10:05:13 +0530
Subject: [PATCH 15/44] 3d.

---
 tests/lora/test_lora_layers_old_backend.py | 91 +++++++++++++++++-----
 1 file changed, 73 insertions(+), 18 deletions(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index dd7e213687e5..130784e5f790 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -121,18 +121,71 @@ def text_encoder_lora_state_dict(text_encoder):
     return state_dict
 
 
-def create_unet_lora_layers(unet: nn.Module, rank=4, is_3d=False, mock_weights=True):
-    in_features = None
-    out_features = None
+def create_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True):
+    for attn_processor_name, attn_processor in unet.attn_processors.items():
+        # Parse the attention module.
+        attn_module = unet
+        for n in attn_processor_name.split(".")[:-1]:
+            attn_module = getattr(attn_module, n)
 
+        # Set the `lora_layer` attribute of the attention-related matrices.
+        attn_module.to_q.set_lora_layer(
+            LoRALinearLayer(
+                in_features=attn_module.to_q.in_features,
+                out_features=attn_module.to_q.out_features,
+                rank=rank,
+            )
+        )
+        attn_module.to_k.set_lora_layer(
+            LoRALinearLayer(
+                in_features=attn_module.to_k.in_features,
+                out_features=attn_module.to_k.out_features,
+                rank=rank,
+            )
+        )
+        attn_module.to_v.set_lora_layer(
+            LoRALinearLayer(
+                in_features=attn_module.to_v.in_features,
+                out_features=attn_module.to_v.out_features,
+                rank=rank,
+            )
+        )
+        attn_module.to_out[0].set_lora_layer(
+            LoRALinearLayer(
+                in_features=attn_module.to_out[0].in_features,
+                out_features=attn_module.to_out[0].out_features,
+                rank=rank,
+            )
+        )
+
+        if mock_weights:
+            with torch.no_grad():
+                attn_module.to_q.lora_layer.up.weight += 1
+                attn_module.to_k.lora_layer.up.weight += 1
+                attn_module.to_v.lora_layer.up.weight += 1
+                attn_module.to_out[0].lora_layer.up.weight += 1
+
+    return unet_lora_state_dict(unet)
+
+
+def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True):
     for attn_processor_name, attn_processor in unet.attn_processors.items():
-        if is_3d and attn_processor_name.startswith("transformer_in"):
+        has_cross_attention = attn_processor_name.endswith("attn2.processor") and not (
+            attn_processor_name.startswith("transformer_in") or "temp_attentions" in attn_processor_name.split(".")
+        )
+        cross_attention_dim = unet.config.cross_attention_dim if has_cross_attention else None
+
+        if attn_processor_name.startswith("mid_block"):
+            hidden_size = unet.config.block_out_channels[-1]
+        elif attn_processor_name.startswith("up_blocks"):
+            block_id = int(attn_processor_name[len("up_blocks.")])
+            hidden_size = list(reversed(unet.config.block_out_channels))[block_id]
+        elif attn_processor_name.startswith("down_blocks"):
+            block_id = int(attn_processor_name[len("down_blocks.")])
+            hidden_size = unet.config.block_out_channels[block_id]
+        elif attn_processor_name.startswith("transformer_in"):
             # Note that the `8 * ...` comes from: https://github.com/huggingface/diffusers/blob/7139f0e874f10b2463caa8cbd585762a309d12d6/src/diffusers/models/unet_3d_condition.py#L148
-            in_features = 8 * unet.config.attention_head_dim
-            has_cross_attention = attn_processor_name.endswith("attn2.processor") and not (
-                attn_processor_name.startswith("transformer_in") or "temp_attentions" in attn_processor_name.split(".")
-            )
-            out_features = unet.config.cross_attention_dim if has_cross_attention else None
+            hidden_size = 8 * unet.config.attention_head_dim
 
         # Parse the attention module.
         attn_module = unet
@@ -142,29 +195,31 @@ def create_unet_lora_layers(unet: nn.Module, rank=4, is_3d=False, mock_weights=T
         # Set the `lora_layer` attribute of the attention-related matrices.
         attn_module.to_q.set_lora_layer(
             LoRALinearLayer(
-                in_features=attn_module.to_q.in_features if in_features is None else in_features,
-                out_features=attn_module.to_q.out_features if out_features is None else out_features,
+                in_features=hidden_size,
+                out_features=attn_module.to_q.out_features if cross_attention_dim is None else cross_attention_dim,
                 rank=rank,
             )
         )
         attn_module.to_k.set_lora_layer(
             LoRALinearLayer(
-                in_features=attn_module.to_k.in_features if in_features is None else in_features,
-                out_features=attn_module.to_k.out_features if out_features is None else out_features,
+                in_features=attn_module.to_k.in_features,
+                out_features=attn_module.to_k.out_features if cross_attention_dim is None else cross_attention_dim,
                 rank=rank,
             )
         )
         attn_module.to_v.set_lora_layer(
             LoRALinearLayer(
-                in_features=attn_module.to_v.in_features if in_features is None else in_features,
-                out_features=attn_module.to_v.out_features if out_features is None else out_features,
+                in_features=attn_module.to_v.in_features,
+                out_features=attn_module.to_v.out_features if cross_attention_dim is None else cross_attention_dim,
                 rank=rank,
             )
         )
         attn_module.to_out[0].set_lora_layer(
             LoRALinearLayer(
-                in_features=attn_module.to_out[0].in_features if in_features is None else in_features,
-                out_features=attn_module.to_out[0].out_features if out_features is None else out_features,
+                in_features=attn_module.to_out[0].in_features,
+                out_features=attn_module.to_out[0].out_features
+                if cross_attention_dim is None
+                else cross_attention_dim,
                 rank=rank,
             )
         )
@@ -1643,7 +1698,7 @@ def test_lora_processors(self):
         with torch.no_grad():
             sample1 = model(**inputs_dict).sample
 
-        unet_lora_params = create_unet_lora_layers(model, is_3d=True)
+        unet_lora_params = create_3d_unet_lora_layers(model)
 
         # make sure we can set a list of attention processors
         model.load_attn_procs(unet_lora_params)

From 5d006d4801ac8fea663f57b1734e467951b7d8b1 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 10:07:06 +0530
Subject: [PATCH 16/44] more fixes.

---
 tests/lora/test_lora_layers_old_backend.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index 130784e5f790..92b56235104e 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -202,22 +202,22 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True):
         )
         attn_module.to_k.set_lora_layer(
             LoRALinearLayer(
-                in_features=attn_module.to_k.in_features,
+                in_features=hidden_size,
                 out_features=attn_module.to_k.out_features if cross_attention_dim is None else cross_attention_dim,
                 rank=rank,
             )
         )
         attn_module.to_v.set_lora_layer(
             LoRALinearLayer(
-                in_features=attn_module.to_v.in_features,
+                in_features=hidden_size,
                 out_features=attn_module.to_v.out_features if cross_attention_dim is None else cross_attention_dim,
                 rank=rank,
             )
         )
         attn_module.to_out[0].set_lora_layer(
             LoRALinearLayer(
-                in_features=attn_module.to_out[0].in_features,
-                out_features=attn_module.to_out[0].out_features
+                in_features=hidden_size,
+                out_features=attn_module.to_out[0].out_features 
                 if cross_attention_dim is None
                 else cross_attention_dim,
                 rank=rank,

From df47cc4af00e7324c236d526fbaf307aeb167205 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 10:11:49 +0530
Subject: [PATCH 17/44] debug

---
 tests/lora/test_lora_layers_old_backend.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index 92b56235104e..2be049008571 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -169,7 +169,7 @@ def create_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True):
 
 
 def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True):
-    for attn_processor_name, attn_processor in unet.attn_processors.items():
+    for attn_processor_name in unet.attn_processors.keys():
         has_cross_attention = attn_processor_name.endswith("attn2.processor") and not (
             attn_processor_name.startswith("transformer_in") or "temp_attentions" in attn_processor_name.split(".")
         )
@@ -193,6 +193,10 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True):
             attn_module = getattr(attn_module, n)
 
         # Set the `lora_layer` attribute of the attention-related matrices.
+        print(f"Hidden size: {hidden_size} in_features (q): {attn_module.to_q.in_features}")
+        print(f"Hidden size: {hidden_size} in_features (k): {attn_module.to_k.in_features}")
+        print(f"Hidden size: {hidden_size} in_features (v): {attn_module.to_v.in_features}")
+        print(f"Hidden size: {hidden_size} in_features (out): {attn_module.to_out[0].in_features}")
         attn_module.to_q.set_lora_layer(
             LoRALinearLayer(
                 in_features=hidden_size,

From 01bd812f89bfc5c07c310b4f6e8c2e87d76cac2e Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 10:15:31 +0530
Subject: [PATCH 18/44] debug

---
 tests/lora/test_lora_layers_old_backend.py | 29 +++++++++++-----------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index 2be049008571..fe5fb74b1832 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -174,16 +174,17 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True):
             attn_processor_name.startswith("transformer_in") or "temp_attentions" in attn_processor_name.split(".")
         )
         cross_attention_dim = unet.config.cross_attention_dim if has_cross_attention else None
-
-        if attn_processor_name.startswith("mid_block"):
-            hidden_size = unet.config.block_out_channels[-1]
-        elif attn_processor_name.startswith("up_blocks"):
-            block_id = int(attn_processor_name[len("up_blocks.")])
-            hidden_size = list(reversed(unet.config.block_out_channels))[block_id]
-        elif attn_processor_name.startswith("down_blocks"):
-            block_id = int(attn_processor_name[len("down_blocks.")])
-            hidden_size = unet.config.block_out_channels[block_id]
-        elif attn_processor_name.startswith("transformer_in"):
+        hidden_size = None
+
+        # if attn_processor_name.startswith("mid_block"):
+        #     hidden_size = unet.config.block_out_channels[-1]
+        # elif attn_processor_name.startswith("up_blocks"):
+        #     block_id = int(attn_processor_name[len("up_blocks.")])
+        #     hidden_size = list(reversed(unet.config.block_out_channels))[block_id]
+        # elif attn_processor_name.startswith("down_blocks"):
+        #     block_id = int(attn_processor_name[len("down_blocks.")])
+        #     hidden_size = unet.config.block_out_channels[block_id]
+        if attn_processor_name.startswith("transformer_in"):
             # Note that the `8 * ...` comes from: https://github.com/huggingface/diffusers/blob/7139f0e874f10b2463caa8cbd585762a309d12d6/src/diffusers/models/unet_3d_condition.py#L148
             hidden_size = 8 * unet.config.attention_head_dim
 
@@ -199,28 +200,28 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True):
         print(f"Hidden size: {hidden_size} in_features (out): {attn_module.to_out[0].in_features}")
         attn_module.to_q.set_lora_layer(
             LoRALinearLayer(
-                in_features=hidden_size,
+                in_features=attn_module.to_q.in_features if hidden_size is None else hidden_size,
                 out_features=attn_module.to_q.out_features if cross_attention_dim is None else cross_attention_dim,
                 rank=rank,
             )
         )
         attn_module.to_k.set_lora_layer(
             LoRALinearLayer(
-                in_features=hidden_size,
+                in_features=attn_module.to_k.in_features if hidden_size is None else hidden_size,
                 out_features=attn_module.to_k.out_features if cross_attention_dim is None else cross_attention_dim,
                 rank=rank,
             )
         )
         attn_module.to_v.set_lora_layer(
             LoRALinearLayer(
-                in_features=hidden_size,
+                in_features=attn_module.to_v.in_features if hidden_size is None else hidden_size,
                 out_features=attn_module.to_v.out_features if cross_attention_dim is None else cross_attention_dim,
                 rank=rank,
             )
         )
         attn_module.to_out[0].set_lora_layer(
             LoRALinearLayer(
-                in_features=hidden_size,
+                in_features=attn_module.to_out[0].in_features if hidden_size is None else hidden_size,
                 out_features=attn_module.to_out[0].out_features 
                 if cross_attention_dim is None
                 else cross_attention_dim,

From a4a46f01017785b057eaa864a279b80a75dee94f Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 10:17:34 +0530
Subject: [PATCH 19/44] debug

---
 tests/lora/test_lora_layers_old_backend.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index fe5fb74b1832..7707bfdea268 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -186,6 +186,7 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True):
         #     hidden_size = unet.config.block_out_channels[block_id]
         if attn_processor_name.startswith("transformer_in"):
             # Note that the `8 * ...` comes from: https://github.com/huggingface/diffusers/blob/7139f0e874f10b2463caa8cbd585762a309d12d6/src/diffusers/models/unet_3d_condition.py#L148
+            print("Within transformer_in")
             hidden_size = 8 * unet.config.attention_head_dim
 
         # Parse the attention module.

From 7d70f37ded680fb84ee802eafedf0d02edbb2cef Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 10:19:28 +0530
Subject: [PATCH 20/44] debug

---
 tests/lora/test_lora_layers_old_backend.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index 7707bfdea268..e78eb5242737 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -174,7 +174,6 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True):
             attn_processor_name.startswith("transformer_in") or "temp_attentions" in attn_processor_name.split(".")
         )
         cross_attention_dim = unet.config.cross_attention_dim if has_cross_attention else None
-        hidden_size = None
 
         # if attn_processor_name.startswith("mid_block"):
         #     hidden_size = unet.config.block_out_channels[-1]
@@ -201,29 +200,37 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True):
         print(f"Hidden size: {hidden_size} in_features (out): {attn_module.to_out[0].in_features}")
         attn_module.to_q.set_lora_layer(
             LoRALinearLayer(
-                in_features=attn_module.to_q.in_features if hidden_size is None else hidden_size,
+                in_features=attn_module.to_q.in_features
+                if not attn_processor_name.startswith("transformer_in")
+                else hidden_size,
                 out_features=attn_module.to_q.out_features if cross_attention_dim is None else cross_attention_dim,
                 rank=rank,
             )
         )
         attn_module.to_k.set_lora_layer(
             LoRALinearLayer(
-                in_features=attn_module.to_k.in_features if hidden_size is None else hidden_size,
+                in_features=attn_module.to_k.in_features
+                if not attn_processor_name.startswith("transformer_in")
+                else hidden_size,
                 out_features=attn_module.to_k.out_features if cross_attention_dim is None else cross_attention_dim,
                 rank=rank,
             )
         )
         attn_module.to_v.set_lora_layer(
             LoRALinearLayer(
-                in_features=attn_module.to_v.in_features if hidden_size is None else hidden_size,
+                in_features=attn_module.to_v.in_features
+                if not attn_processor_name.startswith("transformer_in")
+                else hidden_size,
                 out_features=attn_module.to_v.out_features if cross_attention_dim is None else cross_attention_dim,
                 rank=rank,
             )
         )
         attn_module.to_out[0].set_lora_layer(
             LoRALinearLayer(
-                in_features=attn_module.to_out[0].in_features if hidden_size is None else hidden_size,
-                out_features=attn_module.to_out[0].out_features 
+                in_features=attn_module.to_out[0].in_features
+                if not attn_processor_name.startswith("transformer_in")
+                else hidden_size,
+                out_features=attn_module.to_out[0].out_features
                 if cross_attention_dim is None
                 else cross_attention_dim,
                 rank=rank,

From fe668cdf05b8bad29dbf646c35d5a458fea05028 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 10:23:32 +0530
Subject: [PATCH 21/44] more debug

---
 tests/lora/test_lora_layers_old_backend.py | 35 +++++++++-------------
 1 file changed, 14 insertions(+), 21 deletions(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index e78eb5242737..b39218580562 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -175,15 +175,15 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True):
         )
         cross_attention_dim = unet.config.cross_attention_dim if has_cross_attention else None
 
-        # if attn_processor_name.startswith("mid_block"):
-        #     hidden_size = unet.config.block_out_channels[-1]
-        # elif attn_processor_name.startswith("up_blocks"):
-        #     block_id = int(attn_processor_name[len("up_blocks.")])
-        #     hidden_size = list(reversed(unet.config.block_out_channels))[block_id]
-        # elif attn_processor_name.startswith("down_blocks"):
-        #     block_id = int(attn_processor_name[len("down_blocks.")])
-        #     hidden_size = unet.config.block_out_channels[block_id]
-        if attn_processor_name.startswith("transformer_in"):
+        if attn_processor_name.startswith("mid_block"):
+            hidden_size = unet.config.block_out_channels[-1]
+        elif attn_processor_name.startswith("up_blocks"):
+            block_id = int(attn_processor_name[len("up_blocks.")])
+            hidden_size = list(reversed(unet.config.block_out_channels))[block_id]
+        elif attn_processor_name.startswith("down_blocks"):
+            block_id = int(attn_processor_name[len("down_blocks.")])
+            hidden_size = unet.config.block_out_channels[block_id]
+        elif attn_processor_name.startswith("transformer_in"):
             # Note that the `8 * ...` comes from: https://github.com/huggingface/diffusers/blob/7139f0e874f10b2463caa8cbd585762a309d12d6/src/diffusers/models/unet_3d_condition.py#L148
             print("Within transformer_in")
             hidden_size = 8 * unet.config.attention_head_dim
@@ -198,38 +198,31 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True):
         print(f"Hidden size: {hidden_size} in_features (k): {attn_module.to_k.in_features}")
         print(f"Hidden size: {hidden_size} in_features (v): {attn_module.to_v.in_features}")
         print(f"Hidden size: {hidden_size} in_features (out): {attn_module.to_out[0].in_features}")
+        
         attn_module.to_q.set_lora_layer(
             LoRALinearLayer(
-                in_features=attn_module.to_q.in_features
-                if not attn_processor_name.startswith("transformer_in")
-                else hidden_size,
+                in_features=hidden_size,
                 out_features=attn_module.to_q.out_features if cross_attention_dim is None else cross_attention_dim,
                 rank=rank,
             )
         )
         attn_module.to_k.set_lora_layer(
             LoRALinearLayer(
-                in_features=attn_module.to_k.in_features
-                if not attn_processor_name.startswith("transformer_in")
-                else hidden_size,
+                in_features=hidden_size,
                 out_features=attn_module.to_k.out_features if cross_attention_dim is None else cross_attention_dim,
                 rank=rank,
             )
         )
         attn_module.to_v.set_lora_layer(
             LoRALinearLayer(
-                in_features=attn_module.to_v.in_features
-                if not attn_processor_name.startswith("transformer_in")
-                else hidden_size,
+                in_features=hidden_size,
                 out_features=attn_module.to_v.out_features if cross_attention_dim is None else cross_attention_dim,
                 rank=rank,
             )
         )
         attn_module.to_out[0].set_lora_layer(
             LoRALinearLayer(
-                in_features=attn_module.to_out[0].in_features
-                if not attn_processor_name.startswith("transformer_in")
-                else hidden_size,
+                in_features=hidden_size,
                 out_features=attn_module.to_out[0].out_features
                 if cross_attention_dim is None
                 else cross_attention_dim,

From 2b097de27b7fa29ead3edb5f1849b0cdb444ce1f Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 10:25:47 +0530
Subject: [PATCH 22/44] more debug

---
 tests/lora/test_lora_layers_old_backend.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index b39218580562..285c412c3b4f 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -194,10 +194,10 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True):
             attn_module = getattr(attn_module, n)
 
         # Set the `lora_layer` attribute of the attention-related matrices.
-        print(f"Hidden size: {hidden_size} in_features (q): {attn_module.to_q.in_features}")
-        print(f"Hidden size: {hidden_size} in_features (k): {attn_module.to_k.in_features}")
-        print(f"Hidden size: {hidden_size} in_features (v): {attn_module.to_v.in_features}")
-        print(f"Hidden size: {hidden_size} in_features (out): {attn_module.to_out[0].in_features}")
+        print(f"Hidden size != attn_module.to_q.in_features: {hidden_size != attn_module.to_q.in_features}")
+        print(f"Hidden size != to_k.in_features: {hidden_size != attn_module.to_k.in_features}")
+        print(f"Hidden size != attn_module.to_v.in_features: {hidden_size != attn_module.to_v.in_features}")
+        print(f"Hidden size != attn_module.to_out[0].in_features: {hidden_size != attn_module.to_out[0].in_features}")
         
         attn_module.to_q.set_lora_layer(
             LoRALinearLayer(

From 940a4a0aa55760049f300c519ac38cb25cd66e51 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 10:30:16 +0530
Subject: [PATCH 23/44] more debug

---
 src/diffusers/models/modeling_utils.py     | 3 ++-
 tests/lora/test_lora_layers_old_backend.py | 8 ++++----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
index 546c5b20f937..008d687090bf 100644
--- a/src/diffusers/models/modeling_utils.py
+++ b/src/diffusers/models/modeling_utils.py
@@ -148,7 +148,8 @@ def load_model_dict_into_meta(
         if param_name not in empty_state_dict:
             unexpected_keys.append(param_name)
             continue
-
+        
+        print(f"load_model_dict_into_meta param_name {param_name}")
         if empty_state_dict[param_name].shape != param.shape:
             model_name_or_path_str = f"{model_name_or_path} " if model_name_or_path is not None else ""
             raise ValueError(
diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index 285c412c3b4f..2b92130d4663 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -194,10 +194,10 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True):
             attn_module = getattr(attn_module, n)
 
         # Set the `lora_layer` attribute of the attention-related matrices.
-        print(f"Hidden size != attn_module.to_q.in_features: {hidden_size != attn_module.to_q.in_features}")
-        print(f"Hidden size != to_k.in_features: {hidden_size != attn_module.to_k.in_features}")
-        print(f"Hidden size != attn_module.to_v.in_features: {hidden_size != attn_module.to_v.in_features}")
-        print(f"Hidden size != attn_module.to_out[0].in_features: {hidden_size != attn_module.to_out[0].in_features}")
+        # print(f"Hidden size != attn_module.to_q.in_features: {hidden_size != attn_module.to_q.in_features}")
+        # print(f"Hidden size != attn_module.to_k.in_features: {hidden_size != attn_module.to_k.in_features}")
+        # print(f"Hidden size != attn_module.to_v.in_features: {hidden_size != attn_module.to_v.in_features}")
+        # print(f"Hidden size != attn_module.to_out[0].in_features: {hidden_size != attn_module.to_out[0].in_features}")
         
         attn_module.to_q.set_lora_layer(
             LoRALinearLayer(

From 3b38d0cc1f2ab983b950511b16e252bc72174841 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 10:30:53 +0530
Subject: [PATCH 24/44] more debug

---
 src/diffusers/models/modeling_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
index 008d687090bf..f0d802e7cd7e 100644
--- a/src/diffusers/models/modeling_utils.py
+++ b/src/diffusers/models/modeling_utils.py
@@ -149,8 +149,8 @@ def load_model_dict_into_meta(
             unexpected_keys.append(param_name)
             continue
         
-        print(f"load_model_dict_into_meta param_name {param_name}")
         if empty_state_dict[param_name].shape != param.shape:
+            print(f"load_model_dict_into_meta param_name {param_name}")
             model_name_or_path_str = f"{model_name_or_path} " if model_name_or_path is not None else ""
             raise ValueError(
                 f"Cannot load {model_name_or_path_str}because {param_name} expected shape {empty_state_dict[param_name]}, but got {param.shape}. If you want to instead overwrite randomly initialized weights, please make sure to pass both `low_cpu_mem_usage=False` and `ignore_mismatched_sizes=True`. For more information, see also: https://github.com/huggingface/diffusers/issues/1619#issuecomment-1345604389 as an example."

From 3c05c1078f13dc9defb761450c70cad21504e650 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 10:33:13 +0530
Subject: [PATCH 25/44] more debug

---
 tests/lora/test_lora_layers_old_backend.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index 2b92130d4663..92076594e182 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -202,21 +202,21 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True):
         attn_module.to_q.set_lora_layer(
             LoRALinearLayer(
                 in_features=hidden_size,
-                out_features=attn_module.to_q.out_features if cross_attention_dim is None else cross_attention_dim,
+                out_features=attn_module.to_q.out_features if cross_attention_dim is None else max(attn_module.to_q.out_features, cross_attention_dim),
                 rank=rank,
             )
         )
         attn_module.to_k.set_lora_layer(
             LoRALinearLayer(
                 in_features=hidden_size,
-                out_features=attn_module.to_k.out_features if cross_attention_dim is None else cross_attention_dim,
+                out_features=attn_module.to_k.out_features if cross_attention_dim is None else max(attn_module.to_k.out_features, cross_attention_dim),
                 rank=rank,
             )
         )
         attn_module.to_v.set_lora_layer(
             LoRALinearLayer(
                 in_features=hidden_size,
-                out_features=attn_module.to_v.out_features if cross_attention_dim is None else cross_attention_dim,
+                out_features=attn_module.to_v.out_features if cross_attention_dim is None else max(attn_module.to_v.out_features, cross_attention_dim),
                 rank=rank,
             )
         )
@@ -225,7 +225,7 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True):
                 in_features=hidden_size,
                 out_features=attn_module.to_out[0].out_features
                 if cross_attention_dim is None
-                else cross_attention_dim,
+                else max(attn_module.to_out[0].out_features, cross_attention_dim),
                 rank=rank,
             )
         )

From 620df7dfe27084723a0cf2f5ad4e3a3314c54017 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 10:37:14 +0530
Subject: [PATCH 26/44] more debug

---
 src/diffusers/models/modeling_utils.py     |  2 +-
 tests/lora/test_lora_layers_old_backend.py | 23 +++++++++++++---------
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
index f0d802e7cd7e..5714d027d484 100644
--- a/src/diffusers/models/modeling_utils.py
+++ b/src/diffusers/models/modeling_utils.py
@@ -148,7 +148,7 @@ def load_model_dict_into_meta(
         if param_name not in empty_state_dict:
             unexpected_keys.append(param_name)
             continue
-        
+
         if empty_state_dict[param_name].shape != param.shape:
             print(f"load_model_dict_into_meta param_name {param_name}")
             model_name_or_path_str = f"{model_name_or_path} " if model_name_or_path is not None else ""
diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index 92076594e182..3f7be59781aa 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -185,7 +185,6 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True):
             hidden_size = unet.config.block_out_channels[block_id]
         elif attn_processor_name.startswith("transformer_in"):
             # Note that the `8 * ...` comes from: https://github.com/huggingface/diffusers/blob/7139f0e874f10b2463caa8cbd585762a309d12d6/src/diffusers/models/unet_3d_condition.py#L148
-            print("Within transformer_in")
             hidden_size = 8 * unet.config.attention_head_dim
 
         # Parse the attention module.
@@ -198,31 +197,37 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True):
         # print(f"Hidden size != attn_module.to_k.in_features: {hidden_size != attn_module.to_k.in_features}")
         # print(f"Hidden size != attn_module.to_v.in_features: {hidden_size != attn_module.to_v.in_features}")
         # print(f"Hidden size != attn_module.to_out[0].in_features: {hidden_size != attn_module.to_out[0].in_features}")
-        
+
         attn_module.to_q.set_lora_layer(
             LoRALinearLayer(
-                in_features=hidden_size,
-                out_features=attn_module.to_q.out_features if cross_attention_dim is None else max(attn_module.to_q.out_features, cross_attention_dim),
+                in_features=max(attn_module.to_q.in_features, hidden_size),
+                out_features=attn_module.to_q.out_features
+                if cross_attention_dim is None
+                else max(attn_module.to_q.out_features, cross_attention_dim),
                 rank=rank,
             )
         )
         attn_module.to_k.set_lora_layer(
             LoRALinearLayer(
-                in_features=hidden_size,
-                out_features=attn_module.to_k.out_features if cross_attention_dim is None else max(attn_module.to_k.out_features, cross_attention_dim),
+                in_features=max(attn_module.to_k.in_features, hidden_size),
+                out_features=attn_module.to_k.out_features
+                if cross_attention_dim is None
+                else max(attn_module.to_k.out_features, cross_attention_dim),
                 rank=rank,
             )
         )
         attn_module.to_v.set_lora_layer(
             LoRALinearLayer(
-                in_features=hidden_size,
-                out_features=attn_module.to_v.out_features if cross_attention_dim is None else max(attn_module.to_v.out_features, cross_attention_dim),
+                in_features=max(attn_module.to_v.in_features, hidden_size),
+                out_features=attn_module.to_v.out_features
+                if cross_attention_dim is None
+                else max(attn_module.to_v.out_features, cross_attention_dim),
                 rank=rank,
             )
         )
         attn_module.to_out[0].set_lora_layer(
             LoRALinearLayer(
-                in_features=hidden_size,
+                in_features=max(attn_module.to_out[0].in_features, hidden_size),
                 out_features=attn_module.to_out[0].out_features
                 if cross_attention_dim is None
                 else max(attn_module.to_out[0].out_features, cross_attention_dim),

From 598667c49865e8a2edab0a16ed3a2a0ab5311030 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 10:37:44 +0530
Subject: [PATCH 27/44] hack.

---
 tests/lora/test_lora_layers_old_backend.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index 3f7be59781aa..1b50f0ed30fa 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -200,7 +200,7 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True):
 
         attn_module.to_q.set_lora_layer(
             LoRALinearLayer(
-                in_features=max(attn_module.to_q.in_features, hidden_size),
+                in_features=min(attn_module.to_q.in_features, hidden_size),
                 out_features=attn_module.to_q.out_features
                 if cross_attention_dim is None
                 else max(attn_module.to_q.out_features, cross_attention_dim),
@@ -209,7 +209,7 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True):
         )
         attn_module.to_k.set_lora_layer(
             LoRALinearLayer(
-                in_features=max(attn_module.to_k.in_features, hidden_size),
+                in_features=min(attn_module.to_k.in_features, hidden_size),
                 out_features=attn_module.to_k.out_features
                 if cross_attention_dim is None
                 else max(attn_module.to_k.out_features, cross_attention_dim),
@@ -218,7 +218,7 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True):
         )
         attn_module.to_v.set_lora_layer(
             LoRALinearLayer(
-                in_features=max(attn_module.to_v.in_features, hidden_size),
+                in_features=min(attn_module.to_v.in_features, hidden_size),
                 out_features=attn_module.to_v.out_features
                 if cross_attention_dim is None
                 else max(attn_module.to_v.out_features, cross_attention_dim),
@@ -227,7 +227,7 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True):
         )
         attn_module.to_out[0].set_lora_layer(
             LoRALinearLayer(
-                in_features=max(attn_module.to_out[0].in_features, hidden_size),
+                in_features=min(attn_module.to_out[0].in_features, hidden_size),
                 out_features=attn_module.to_out[0].out_features
                 if cross_attention_dim is None
                 else max(attn_module.to_out[0].out_features, cross_attention_dim),

From 60c524240393d48c8d8069ef30cd7c455ef5b7f8 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 10:38:51 +0530
Subject: [PATCH 28/44] remove comments and prep for a PR.

---
 src/diffusers/models/modeling_utils.py     |  1 -
 tests/lora/test_lora_layers_old_backend.py | 89 ----------------------
 2 files changed, 90 deletions(-)

diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
index 5714d027d484..546c5b20f937 100644
--- a/src/diffusers/models/modeling_utils.py
+++ b/src/diffusers/models/modeling_utils.py
@@ -150,7 +150,6 @@ def load_model_dict_into_meta(
             continue
 
         if empty_state_dict[param_name].shape != param.shape:
-            print(f"load_model_dict_into_meta param_name {param_name}")
             model_name_or_path_str = f"{model_name_or_path} " if model_name_or_path is not None else ""
             raise ValueError(
                 f"Cannot load {model_name_or_path_str}because {param_name} expected shape {empty_state_dict[param_name]}, but got {param.shape}. If you want to instead overwrite randomly initialized weights, please make sure to pass both `low_cpu_mem_usage=False` and `ignore_mismatched_sizes=True`. For more information, see also: https://github.com/huggingface/diffusers/issues/1619#issuecomment-1345604389 as an example."
diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index 1b50f0ed30fa..492c938ecd51 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -61,33 +61,6 @@
 )
 
 
-# def create_lora_layers(model, mock_weights: bool = True):
-#     lora_attn_procs = {}
-#     for name in model.attn_processors.keys():
-#         cross_attention_dim = None if name.endswith("attn1.processor") else model.config.cross_attention_dim
-#         if name.startswith("mid_block"):
-#             hidden_size = model.config.block_out_channels[-1]
-#         elif name.startswith("up_blocks"):
-#             block_id = int(name[len("up_blocks.")])
-#             hidden_size = list(reversed(model.config.block_out_channels))[block_id]
-#         elif name.startswith("down_blocks"):
-#             block_id = int(name[len("down_blocks.")])
-#             hidden_size = model.config.block_out_channels[block_id]
-
-#         lora_attn_procs[name] = LoRAAttnProcessor(hidden_size=hidden_size, cross_attention_dim=cross_attention_dim)
-#         lora_attn_procs[name] = lora_attn_procs[name].to(model.device)
-
-#         if mock_weights:
-#             # add 1 to weights to mock trained weights
-#             with torch.no_grad():
-#                 lora_attn_procs[name].to_q_lora.up.weight += 1
-#                 lora_attn_procs[name].to_k_lora.up.weight += 1
-#                 lora_attn_procs[name].to_v_lora.up.weight += 1
-#                 lora_attn_procs[name].to_out_lora.up.weight += 1
-
-#     return lora_attn_procs
-
-
 def text_encoder_attn_modules(text_encoder):
     attn_modules = []
 
@@ -192,12 +165,6 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True):
         for n in attn_processor_name.split(".")[:-1]:
             attn_module = getattr(attn_module, n)
 
-        # Set the `lora_layer` attribute of the attention-related matrices.
-        # print(f"Hidden size != attn_module.to_q.in_features: {hidden_size != attn_module.to_q.in_features}")
-        # print(f"Hidden size != attn_module.to_k.in_features: {hidden_size != attn_module.to_k.in_features}")
-        # print(f"Hidden size != attn_module.to_v.in_features: {hidden_size != attn_module.to_v.in_features}")
-        # print(f"Hidden size != attn_module.to_out[0].in_features: {hidden_size != attn_module.to_out[0].in_features}")
-
         attn_module.to_q.set_lora_layer(
             LoRALinearLayer(
                 in_features=min(attn_module.to_q.in_features, hidden_size),
@@ -245,62 +212,6 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True):
     return unet_lora_state_dict(unet)
 
 
-# def create_text_encoder_lora_attn_procs(text_encoder: nn.Module):
-#     text_lora_attn_procs = {}
-#     lora_attn_processor_class = (
-#         LoRAAttnProcessor2_0 if hasattr(F, "scaled_dot_product_attention") else LoRAAttnProcessor
-#     )
-#     for name, module in text_encoder_attn_modules(text_encoder):
-#         if isinstance(module.out_proj, nn.Linear):
-#             out_features = module.out_proj.out_features
-#         elif isinstance(module.out_proj, PatchedLoraProjection):
-#             out_features = module.out_proj.regular_linear_layer.out_features
-#         else:
-#             assert False, module.out_proj.__class__
-
-#         text_lora_attn_procs[name] = lora_attn_processor_class(hidden_size=out_features, cross_attention_dim=None)
-#     return text_lora_attn_procs
-
-
-# def create_text_encoder_lora_layers(text_encoder: nn.Module):
-#     text_lora_attn_procs = create_text_encoder_lora_attn_procs(text_encoder)
-#     text_encoder_lora_layers = AttnProcsLayers(text_lora_attn_procs)
-#     return text_encoder_lora_layers
-
-
-# def create_lora_3d_layers(model, mock_weights: bool = True):
-#     lora_attn_procs = {}
-#     for name in model.attn_processors.keys():
-#         has_cross_attention = name.endswith("attn2.processor") and not (
-#             name.startswith("transformer_in") or "temp_attentions" in name.split(".")
-#         )
-#         cross_attention_dim = model.config.cross_attention_dim if has_cross_attention else None
-#         if name.startswith("mid_block"):
-#             hidden_size = model.config.block_out_channels[-1]
-#         elif name.startswith("up_blocks"):
-#             block_id = int(name[len("up_blocks.")])
-#             hidden_size = list(reversed(model.config.block_out_channels))[block_id]
-#         elif name.startswith("down_blocks"):
-#             block_id = int(name[len("down_blocks.")])
-#             hidden_size = model.config.block_out_channels[block_id]
-#         elif name.startswith("transformer_in"):
-#             # Note that the `8 * ...` comes from: https://github.com/huggingface/diffusers/blob/7139f0e874f10b2463caa8cbd585762a309d12d6/src/diffusers/models/unet_3d_condition.py#L148
-#             hidden_size = 8 * model.config.attention_head_dim
-
-#         lora_attn_procs[name] = LoRAAttnProcessor(hidden_size=hidden_size, cross_attention_dim=cross_attention_dim)
-#         lora_attn_procs[name] = lora_attn_procs[name].to(model.device)
-
-#         if mock_weights:
-#             # add 1 to weights to mock trained weights
-#             with torch.no_grad():
-#                 lora_attn_procs[name].to_q_lora.up.weight += 1
-#                 lora_attn_procs[name].to_k_lora.up.weight += 1
-#                 lora_attn_procs[name].to_v_lora.up.weight += 1
-#                 lora_attn_procs[name].to_out_lora.up.weight += 1
-
-#     return lora_attn_procs
-
-
 def set_lora_weights(lora_attn_parameters, randn_weight=False, var=1.0):
     with torch.no_grad():
         for parameter in lora_attn_parameters:

From 22296683fe7bcd4b56a4fd257a687d027597c8ba Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 11:05:30 +0530
Subject: [PATCH 29/44] appropriate set_lora_weights()

---
 tests/lora/test_lora_layers_old_backend.py | 135 ++++++++++++++-------
 1 file changed, 91 insertions(+), 44 deletions(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index 492c938ecd51..6c6d143382c1 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -95,6 +95,8 @@ def text_encoder_lora_state_dict(text_encoder):
 
 
 def create_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True):
+    unet_lora_parameters = []
+
     for attn_processor_name, attn_processor in unet.attn_processors.items():
         # Parse the attention module.
         attn_module = unet
@@ -138,7 +140,12 @@ def create_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True):
                 attn_module.to_v.lora_layer.up.weight += 1
                 attn_module.to_out[0].lora_layer.up.weight += 1
 
-    return unet_lora_state_dict(unet)
+        unet_lora_parameters.extend(attn_module.to_q.lora_layer.parameters())
+        unet_lora_parameters.extend(attn_module.to_k.lora_layer.parameters())
+        unet_lora_parameters.extend(attn_module.to_v.lora_layer.parameters())
+        unet_lora_parameters.extend(attn_module.to_out[0].lora_layer.parameters())
+
+    return unet_lora_parameters, unet_lora_state_dict(unet)
 
 
 def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True):
@@ -213,12 +220,16 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True):
 
 
 def set_lora_weights(lora_attn_parameters, randn_weight=False, var=1.0):
-    with torch.no_grad():
-        for parameter in lora_attn_parameters:
-            if randn_weight:
-                parameter[:] = torch.randn_like(parameter) * var
-            else:
-                torch.zero_(parameter)
+    if not isinstance(lora_attn_parameters, dict):
+        with torch.no_grad():
+            for parameter in lora_attn_parameters:
+                if randn_weight:
+                    parameter[:] = torch.randn_like(parameter) * var
+                else:
+                    torch.zero_(parameter)
+    else:
+        modified_state_dict = {k: torch.rand_like(v) * var for k, v in lora_attn_parameters.items()}
+        return modified_state_dict
 
 
 def state_dicts_almost_equal(sd1, sd2):
@@ -280,7 +291,7 @@ def get_dummy_components(self):
         text_encoder = CLIPTextModel(text_encoder_config)
         tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
 
-        unet_lora_params = create_unet_lora_layers(unet, rank=self.lora_rank)
+        unet_lora_raw_params, unet_lora_params = create_unet_lora_layers(unet, rank=self.lora_rank)
         text_encoder_lora_params = LoraLoaderMixin._modify_text_encoder(
             text_encoder, dtype=torch.float32, rank=self.lora_rank
         )
@@ -296,7 +307,11 @@ def get_dummy_components(self):
             "feature_extractor": None,
             "image_encoder": None,
         }
-        lora_components = {"unet_lora_params": unet_lora_params, "text_encoder_lora_params": text_encoder_lora_params}
+        lora_components = {
+            "unet_lora_raw_params": unet_lora_raw_params,
+            "unet_lora_params": unet_lora_params,
+            "text_encoder_lora_params": text_encoder_lora_params,
+        }
         return pipeline_components, lora_components
 
     def get_dummy_inputs(self, with_generator=True):
@@ -599,8 +614,9 @@ def test_unload_lora_sd(self):
         orig_image_slice = original_images[0, -3:, -3:, -1]
 
         # Emulate training.
-        set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True)
-        set_lora_weights(lora_components["text_encoder_lora_layers"].parameters(), randn_weight=True)
+        lora_components["text_encoder_lora_layers"] = set_lora_weights(
+            lora_components["text_encoder_lora_layers"], randn_weight=True
+        )
 
         with tempfile.TemporaryDirectory() as tmpdirname:
             LoraLoaderMixin.save_lora_weights(
@@ -774,7 +790,7 @@ def get_dummy_components(self):
         text_encoder = CLIPTextModel(text_encoder_config)
         tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
 
-        unet_lora_params = create_unet_lora_layers(unet, rank=self.lora_rank)
+        unet_lora_raw_params, unet_lora_params = create_unet_lora_layers(unet, rank=self.lora_rank)
         text_encoder_lora_params = LoraLoaderMixin._modify_text_encoder(
             text_encoder, dtype=torch.float32, rank=self.lora_rank
         )
@@ -790,7 +806,11 @@ def get_dummy_components(self):
             "feature_extractor": None,
             "image_encoder": None,
         }
-        lora_components = {"unet_lora_params": unet_lora_params, "text_encoder_lora_params": text_encoder_lora_params}
+        lora_components = {
+            "unet_lora_raw_params": unet_lora_raw_params,
+            "unet_lora_params": unet_lora_params,
+            "text_encoder_lora_params": text_encoder_lora_params,
+        }
         return components, lora_components
 
     def test_stable_diffusion_inpaint_lora(self):
@@ -888,7 +908,7 @@ def get_dummy_components(self):
         text_encoder_2 = CLIPTextModelWithProjection(text_encoder_config)
         tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
 
-        unet_lora_params = create_unet_lora_layers(unet, rank=self.lora_rank)
+        _, unet_lora_params = create_unet_lora_layers(unet, rank=self.lora_rank)
         text_encoder_lora_params = StableDiffusionXLLoraLoaderMixin._modify_text_encoder(
             text_encoder, dtype=torch.float32, rank=self.lora_rank
         )
@@ -973,9 +993,12 @@ def test_unload_lora_sdxl(self):
         orig_image_slice = original_images[0, -3:, -3:, -1]
 
         # Emulate training.
-        set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True)
-        set_lora_weights(lora_components["text_encoder_lora_params"].parameters(), randn_weight=True)
-        set_lora_weights(lora_components["text_encoder_two_lora_params"].parameters(), randn_weight=True)
+        lora_components["text_encoder_lora_params"] = set_lora_weights(
+            lora_components["text_encoder_lora_params"], randn_weight=True
+        )
+        lora_components["text_encoder_two_lora_params"] = set_lora_weights(
+            lora_components["text_encoder_two_lora_params"], randn_weight=True
+        )
 
         with tempfile.TemporaryDirectory() as tmpdirname:
             StableDiffusionXLPipeline.save_lora_weights(
@@ -1091,9 +1114,12 @@ def test_lora_fuse_nan(self):
         _, _, pipeline_inputs = self.get_dummy_inputs(with_generator=False)
 
         # Emulate training.
-        set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True)
-        set_lora_weights(lora_components["text_encoder_lora_params"].parameters(), randn_weight=True)
-        set_lora_weights(lora_components["text_encoder_two_lora_params"].parameters(), randn_weight=True)
+        lora_components["text_encoder_lora_params"] = set_lora_weights(
+            lora_components["text_encoder_lora_params"], randn_weight=True
+        )
+        lora_components["text_encoder_two_lora_params"] = set_lora_weights(
+            lora_components["text_encoder_two_lora_params"], randn_weight=True
+        )
 
         with tempfile.TemporaryDirectory() as tmpdirname:
             StableDiffusionXLPipeline.save_lora_weights(
@@ -1135,9 +1161,12 @@ def test_lora_fusion(self):
         orig_image_slice = original_images[0, -3:, -3:, -1]
 
         # Emulate training.
-        set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True)
-        set_lora_weights(lora_components["text_encoder_lora_params"].parameters(), randn_weight=True)
-        set_lora_weights(lora_components["text_encoder_two_lora_params"].parameters(), randn_weight=True)
+        lora_components["text_encoder_lora_params"] = set_lora_weights(
+            lora_components["text_encoder_lora_params"], randn_weight=True
+        )
+        lora_components["text_encoder_two_lora_params"] = set_lora_weights(
+            lora_components["text_encoder_two_lora_params"], randn_weight=True
+        )
 
         with tempfile.TemporaryDirectory() as tmpdirname:
             StableDiffusionXLPipeline.save_lora_weights(
@@ -1168,9 +1197,12 @@ def test_unfuse_lora(self):
         orig_image_slice = original_images[0, -3:, -3:, -1]
 
         # Emulate training.
-        set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True)
-        set_lora_weights(lora_components["text_encoder_lora_params"].parameters(), randn_weight=True)
-        set_lora_weights(lora_components["text_encoder_two_lora_params"].parameters(), randn_weight=True)
+        lora_components["text_encoder_lora_params"] = set_lora_weights(
+            lora_components["text_encoder_lora_params"], randn_weight=True
+        )
+        lora_components["text_encoder_two_lora_params"] = set_lora_weights(
+            lora_components["text_encoder_two_lora_params"], randn_weight=True
+        )
 
         with tempfile.TemporaryDirectory() as tmpdirname:
             StableDiffusionXLPipeline.save_lora_weights(
@@ -1213,9 +1245,12 @@ def test_lora_fusion_is_not_affected_by_unloading(self):
         _ = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images
 
         # Emulate training.
-        set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True)
-        set_lora_weights(lora_components["text_encoder_lora_params"].parameters(), randn_weight=True)
-        set_lora_weights(lora_components["text_encoder_two_lora_params"].parameters(), randn_weight=True)
+        lora_components["text_encoder_lora_params"] = set_lora_weights(
+            lora_components["text_encoder_lora_params"], randn_weight=True
+        )
+        lora_components["text_encoder_two_lora_params"] = set_lora_weights(
+            lora_components["text_encoder_two_lora_params"], randn_weight=True
+        )
 
         with tempfile.TemporaryDirectory() as tmpdirname:
             StableDiffusionXLPipeline.save_lora_weights(
@@ -1252,9 +1287,12 @@ def test_fuse_lora_with_different_scales(self):
         _ = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images
 
         # Emulate training.
-        set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True)
-        set_lora_weights(lora_components["text_encoder_lora_params"].parameters(), randn_weight=True)
-        set_lora_weights(lora_components["text_encoder_two_lora_params"].parameters(), randn_weight=True)
+        lora_components["text_encoder_lora_params"] = set_lora_weights(
+            lora_components["text_encoder_lora_params"], randn_weight=True
+        )
+        lora_components["text_encoder_two_lora_params"] = set_lora_weights(
+            lora_components["text_encoder_two_lora_params"], randn_weight=True
+        )
 
         with tempfile.TemporaryDirectory() as tmpdirname:
             StableDiffusionXLPipeline.save_lora_weights(
@@ -1304,9 +1342,12 @@ def test_with_different_scales(self):
         original_imagee_slice = original_images[0, -3:, -3:, -1]
 
         # Emulate training.
-        set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True)
-        set_lora_weights(lora_components["text_encoder_lora_params"].parameters(), randn_weight=True)
-        set_lora_weights(lora_components["text_encoder_two_lora_params"].parameters(), randn_weight=True)
+        lora_components["text_encoder_lora_params"] = set_lora_weights(
+            lora_components["text_encoder_lora_params"], randn_weight=True
+        )
+        lora_components["text_encoder_two_lora_params"] = set_lora_weights(
+            lora_components["text_encoder_two_lora_params"], randn_weight=True
+        )
 
         with tempfile.TemporaryDirectory() as tmpdirname:
             StableDiffusionXLPipeline.save_lora_weights(
@@ -1352,9 +1393,12 @@ def test_with_different_scales_fusion_equivalence(self):
         images_slice = images[0, -3:, -3:, -1]
 
         # Emulate training.
-        set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True, var=0.1)
-        set_lora_weights(lora_components["text_encoder_lora_params"].parameters(), randn_weight=True, var=0.1)
-        set_lora_weights(lora_components["text_encoder_two_lora_params"].parameters(), randn_weight=True, var=0.1)
+        lora_components["text_encoder_lora_params"] = set_lora_weights(
+            lora_components["text_encoder_lora_params"], randn_weight=True, var=0.1
+        )
+        lora_components["text_encoder_two_lora_params"] = set_lora_weights(
+            lora_components["text_encoder_two_lora_params"], randn_weight=True, var=0.1
+        )
 
         with tempfile.TemporaryDirectory() as tmpdirname:
             StableDiffusionXLPipeline.save_lora_weights(
@@ -1399,9 +1443,12 @@ def test_save_load_fused_lora_modules(self):
         _, _, pipeline_inputs = self.get_dummy_inputs(with_generator=False)
 
         # Emulate training.
-        set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True, var=0.1)
-        set_lora_weights(lora_components["text_encoder_lora_params"].parameters(), randn_weight=True, var=0.1)
-        set_lora_weights(lora_components["text_encoder_two_lora_params"].parameters(), randn_weight=True, var=0.1)
+        lora_components["text_encoder_lora_params"] = set_lora_weights(
+            lora_components["text_encoder_lora_params"], randn_weight=True, var=0.1
+        )
+        lora_components["text_encoder_two_lora_params"] = set_lora_weights(
+            lora_components["text_encoder_two_lora_params"], randn_weight=True, var=0.1
+        )
 
         with tempfile.TemporaryDirectory() as tmpdirname:
             StableDiffusionXLPipeline.save_lora_weights(
@@ -1482,7 +1529,7 @@ def test_lora_processors(self):
         with torch.no_grad():
             sample1 = model(**inputs_dict).sample
 
-        lora_params = create_unet_lora_layers(model)
+        _, lora_params = create_unet_lora_layers(model)
 
         # make sure we can set a list of attention processors
         model.load_attn_procs(lora_params)
@@ -1515,7 +1562,7 @@ def test_lora_on_off(self, expected_max_diff=1e-3):
         with torch.no_grad():
             old_sample = model(**inputs_dict).sample
 
-        lora_params = create_unet_lora_layers(model)
+        _, lora_params = create_unet_lora_layers(model)
         model.load_attn_procs(lora_params)
 
         with torch.no_grad():
@@ -1545,7 +1592,7 @@ def test_lora_xformers_on_off(self, expected_max_diff=6e-4):
         torch.manual_seed(0)
         model = self.model_class(**init_dict)
         model.to(torch_device)
-        lora_params = create_unet_lora_layers(model)
+        _, lora_params = create_unet_lora_layers(model)
         model.load_attn_procs(lora_params)
 
         # default

From 03bb29cc958ba20d80953d0f69ad11a6a9c4f100 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 11:24:50 +0530
Subject: [PATCH 30/44] fix

---
 tests/lora/test_lora_layers_old_backend.py | 121 ++++++---------------
 1 file changed, 32 insertions(+), 89 deletions(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index 6c6d143382c1..8750bfe306b5 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -228,7 +228,10 @@ def set_lora_weights(lora_attn_parameters, randn_weight=False, var=1.0):
                 else:
                     torch.zero_(parameter)
     else:
-        modified_state_dict = {k: torch.rand_like(v) * var for k, v in lora_attn_parameters.items()}
+        if randn_weight:
+            modified_state_dict = {k: torch.rand_like(v) * var for k, v in lora_attn_parameters.items()}
+        else:
+            modified_state_dict = {k: torch.zeros_like(v) * var for k, v in lora_attn_parameters.items()}
         return modified_state_dict
 
 
@@ -295,7 +298,9 @@ def get_dummy_components(self):
         text_encoder_lora_params = LoraLoaderMixin._modify_text_encoder(
             text_encoder, dtype=torch.float32, rank=self.lora_rank
         )
-        text_encoder_lora_params = text_encoder_lora_state_dict(text_encoder)
+        text_encoder_lora_params = set_lora_weights(
+            text_encoder_lora_state_dict(text_encoder), randn_weight=True, var=0.1
+        )
 
         pipeline_components = {
             "unet": unet,
@@ -469,13 +474,15 @@ def test_text_encoder_lora_monkey_patch(self):
         assert outputs_without_lora.shape == (1, 77, 32)
 
         # monkey patch
-        params = pipe._modify_text_encoder(pipe.text_encoder, pipe.lora_scale)
-        set_lora_weights(params, randn_weight=False)
+        text_encoder_lora_params = pipe._modify_text_encoder(pipe.text_encoder, pipe.lora_scale)
+        text_encoder_lora_params = set_lora_weights(
+            text_encoder_lora_state_dict(pipe.text_encoder), randn_weight=False
+        )
         with tempfile.TemporaryDirectory() as tmpdirname:
             LoraLoaderMixin.save_lora_weights(
                 save_directory=tmpdirname,
                 unet_lora_layers=None,
-                text_encoder_lora_layers=params,
+                text_encoder_lora_layers=text_encoder_lora_params,
             )
             self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
             pipe.load_lora_weights(tmpdirname)
@@ -492,13 +499,15 @@ def test_text_encoder_lora_monkey_patch(self):
         pipeline_components, _ = self.get_dummy_components()
         pipe = StableDiffusionPipeline(**pipeline_components)
 
-        params = pipe._modify_text_encoder(pipe.text_encoder, pipe.lora_scale)
-        set_lora_weights(params, randn_weight=True)
+        text_encoder_lora_params = pipe._modify_text_encoder(pipe.text_encoder, pipe.lora_scale)
+        text_encoder_lora_params = set_lora_weights(
+            text_encoder_lora_state_dict(pipe.text_encoder), randn_weight=True, var=0.1
+        )
         with tempfile.TemporaryDirectory() as tmpdirname:
             LoraLoaderMixin.save_lora_weights(
                 save_directory=tmpdirname,
                 unet_lora_layers=None,
-                text_encoder_lora_layers=params,
+                text_encoder_lora_layers=text_encoder_lora_params,
             )
             self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
             pipe.load_lora_weights(tmpdirname)
@@ -523,7 +532,7 @@ def test_text_encoder_lora_remove_monkey_patch(self):
 
         # monkey patch
         params = pipe._modify_text_encoder(pipe.text_encoder, pipe.lora_scale)
-        set_lora_weights(params, randn_weight=True)
+        params = set_lora_weights(text_encoder_lora_state_dict(pipe.text_encoder), var=0.1, randn_weight=True)
         with tempfile.TemporaryDirectory() as tmpdirname:
             LoraLoaderMixin.save_lora_weights(
                 save_directory=tmpdirname,
@@ -615,7 +624,9 @@ def test_unload_lora_sd(self):
 
         # Emulate training.
         lora_components["text_encoder_lora_layers"] = set_lora_weights(
-            lora_components["text_encoder_lora_layers"], randn_weight=True
+            lora_components["text_encoder_lora_layers"],
+            randn_weight=True,
+            var=0.1,
         )
 
         with tempfile.TemporaryDirectory() as tmpdirname:
@@ -714,7 +725,7 @@ def test_lora_save_load_with_xformers(self):
 
 
 @deprecate_after_peft_backend
-class SDXInpaintLoraMixinTests(unittest.TestCase):
+class SDInpaintLoraMixinTests(unittest.TestCase):
     lora_rank = 4
 
     def get_dummy_inputs(self, device, seed=0, img_res=64, output_pil=True):
@@ -791,10 +802,12 @@ def get_dummy_components(self):
         tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
 
         unet_lora_raw_params, unet_lora_params = create_unet_lora_layers(unet, rank=self.lora_rank)
-        text_encoder_lora_params = LoraLoaderMixin._modify_text_encoder(
+        text_encoder_lora_params = StableDiffusionXLLoraLoaderMixin._modify_text_encoder(
             text_encoder, dtype=torch.float32, rank=self.lora_rank
         )
-        text_encoder_lora_params = text_encoder_lora_state_dict(text_encoder)
+        text_encoder_lora_params = set_lora_weights(
+            text_encoder_lora_state_dict(text_encoder), randn_weight=True, var=0.1
+        )
 
         components = {
             "unet": unet,
@@ -912,11 +925,15 @@ def get_dummy_components(self):
         text_encoder_lora_params = StableDiffusionXLLoraLoaderMixin._modify_text_encoder(
             text_encoder, dtype=torch.float32, rank=self.lora_rank
         )
-        text_encoder_lora_params = text_encoder_lora_state_dict(text_encoder)
+        text_encoder_lora_params = set_lora_weights(
+            text_encoder_lora_state_dict(text_encoder), randn_weight=True, var=0.1
+        )
         text_encoder_two_lora_params = StableDiffusionXLLoraLoaderMixin._modify_text_encoder(
             text_encoder_2, dtype=torch.float32, rank=self.lora_rank
         )
-        text_encoder_two_lora_params = text_encoder_lora_state_dict(text_encoder_2)
+        text_encoder_two_lora_params = set_lora_weights(
+            text_encoder_lora_state_dict(text_encoder_2), randn_weight=True, var=0.1
+        )
 
         pipeline_components = {
             "unet": unet,
@@ -992,14 +1009,6 @@ def test_unload_lora_sdxl(self):
         original_images = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images
         orig_image_slice = original_images[0, -3:, -3:, -1]
 
-        # Emulate training.
-        lora_components["text_encoder_lora_params"] = set_lora_weights(
-            lora_components["text_encoder_lora_params"], randn_weight=True
-        )
-        lora_components["text_encoder_two_lora_params"] = set_lora_weights(
-            lora_components["text_encoder_two_lora_params"], randn_weight=True
-        )
-
         with tempfile.TemporaryDirectory() as tmpdirname:
             StableDiffusionXLPipeline.save_lora_weights(
                 save_directory=tmpdirname,
@@ -1111,16 +1120,6 @@ def test_lora_fuse_nan(self):
         sd_pipe = sd_pipe.to(torch_device)
         sd_pipe.set_progress_bar_config(disable=None)
 
-        _, _, pipeline_inputs = self.get_dummy_inputs(with_generator=False)
-
-        # Emulate training.
-        lora_components["text_encoder_lora_params"] = set_lora_weights(
-            lora_components["text_encoder_lora_params"], randn_weight=True
-        )
-        lora_components["text_encoder_two_lora_params"] = set_lora_weights(
-            lora_components["text_encoder_two_lora_params"], randn_weight=True
-        )
-
         with tempfile.TemporaryDirectory() as tmpdirname:
             StableDiffusionXLPipeline.save_lora_weights(
                 save_directory=tmpdirname,
@@ -1160,14 +1159,6 @@ def test_lora_fusion(self):
         original_images = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images
         orig_image_slice = original_images[0, -3:, -3:, -1]
 
-        # Emulate training.
-        lora_components["text_encoder_lora_params"] = set_lora_weights(
-            lora_components["text_encoder_lora_params"], randn_weight=True
-        )
-        lora_components["text_encoder_two_lora_params"] = set_lora_weights(
-            lora_components["text_encoder_two_lora_params"], randn_weight=True
-        )
-
         with tempfile.TemporaryDirectory() as tmpdirname:
             StableDiffusionXLPipeline.save_lora_weights(
                 save_directory=tmpdirname,
@@ -1196,14 +1187,6 @@ def test_unfuse_lora(self):
         original_images = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images
         orig_image_slice = original_images[0, -3:, -3:, -1]
 
-        # Emulate training.
-        lora_components["text_encoder_lora_params"] = set_lora_weights(
-            lora_components["text_encoder_lora_params"], randn_weight=True
-        )
-        lora_components["text_encoder_two_lora_params"] = set_lora_weights(
-            lora_components["text_encoder_two_lora_params"], randn_weight=True
-        )
-
         with tempfile.TemporaryDirectory() as tmpdirname:
             StableDiffusionXLPipeline.save_lora_weights(
                 save_directory=tmpdirname,
@@ -1244,14 +1227,6 @@ def test_lora_fusion_is_not_affected_by_unloading(self):
 
         _ = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images
 
-        # Emulate training.
-        lora_components["text_encoder_lora_params"] = set_lora_weights(
-            lora_components["text_encoder_lora_params"], randn_weight=True
-        )
-        lora_components["text_encoder_two_lora_params"] = set_lora_weights(
-            lora_components["text_encoder_two_lora_params"], randn_weight=True
-        )
-
         with tempfile.TemporaryDirectory() as tmpdirname:
             StableDiffusionXLPipeline.save_lora_weights(
                 save_directory=tmpdirname,
@@ -1286,14 +1261,6 @@ def test_fuse_lora_with_different_scales(self):
 
         _ = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images
 
-        # Emulate training.
-        lora_components["text_encoder_lora_params"] = set_lora_weights(
-            lora_components["text_encoder_lora_params"], randn_weight=True
-        )
-        lora_components["text_encoder_two_lora_params"] = set_lora_weights(
-            lora_components["text_encoder_two_lora_params"], randn_weight=True
-        )
-
         with tempfile.TemporaryDirectory() as tmpdirname:
             StableDiffusionXLPipeline.save_lora_weights(
                 save_directory=tmpdirname,
@@ -1341,14 +1308,6 @@ def test_with_different_scales(self):
         original_images = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images
         original_imagee_slice = original_images[0, -3:, -3:, -1]
 
-        # Emulate training.
-        lora_components["text_encoder_lora_params"] = set_lora_weights(
-            lora_components["text_encoder_lora_params"], randn_weight=True
-        )
-        lora_components["text_encoder_two_lora_params"] = set_lora_weights(
-            lora_components["text_encoder_two_lora_params"], randn_weight=True
-        )
-
         with tempfile.TemporaryDirectory() as tmpdirname:
             StableDiffusionXLPipeline.save_lora_weights(
                 save_directory=tmpdirname,
@@ -1392,14 +1351,6 @@ def test_with_different_scales_fusion_equivalence(self):
         images = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images
         images_slice = images[0, -3:, -3:, -1]
 
-        # Emulate training.
-        lora_components["text_encoder_lora_params"] = set_lora_weights(
-            lora_components["text_encoder_lora_params"], randn_weight=True, var=0.1
-        )
-        lora_components["text_encoder_two_lora_params"] = set_lora_weights(
-            lora_components["text_encoder_two_lora_params"], randn_weight=True, var=0.1
-        )
-
         with tempfile.TemporaryDirectory() as tmpdirname:
             StableDiffusionXLPipeline.save_lora_weights(
                 save_directory=tmpdirname,
@@ -1442,14 +1393,6 @@ def test_save_load_fused_lora_modules(self):
 
         _, _, pipeline_inputs = self.get_dummy_inputs(with_generator=False)
 
-        # Emulate training.
-        lora_components["text_encoder_lora_params"] = set_lora_weights(
-            lora_components["text_encoder_lora_params"], randn_weight=True, var=0.1
-        )
-        lora_components["text_encoder_two_lora_params"] = set_lora_weights(
-            lora_components["text_encoder_two_lora_params"], randn_weight=True, var=0.1
-        )
-
         with tempfile.TemporaryDirectory() as tmpdirname:
             StableDiffusionXLPipeline.save_lora_weights(
                 save_directory=tmpdirname,

From f49185edf731c7a28eb38538d8d239278715fd7f Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 11:28:47 +0530
Subject: [PATCH 31/44] fix: test_unload_lora_sd

---
 tests/lora/test_lora_layers_old_backend.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index 8750bfe306b5..d45722ee526f 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -622,13 +622,6 @@ def test_unload_lora_sd(self):
         original_images = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images
         orig_image_slice = original_images[0, -3:, -3:, -1]
 
-        # Emulate training.
-        lora_components["text_encoder_lora_layers"] = set_lora_weights(
-            lora_components["text_encoder_lora_layers"],
-            randn_weight=True,
-            var=0.1,
-        )
-
         with tempfile.TemporaryDirectory() as tmpdirname:
             LoraLoaderMixin.save_lora_weights(
                 save_directory=tmpdirname,

From b05585d02324974a01b2556fd36891606e3f31ee Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 11:29:16 +0530
Subject: [PATCH 32/44] fix: test_unload_lora_sd

---
 tests/lora/test_lora_layers_old_backend.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index d45722ee526f..0ae054f8679b 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -626,7 +626,7 @@ def test_unload_lora_sd(self):
             LoraLoaderMixin.save_lora_weights(
                 save_directory=tmpdirname,
                 unet_lora_layers=lora_components["unet_lora_params"],
-                text_encoder_lora_layers=lora_components["text_encoder_lora_layers"],
+                text_encoder_lora_layers=lora_components["text_encoder_lora_params"],
             )
             self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
             sd_pipe.load_lora_weights(tmpdirname)

From 451816bee63e61af0d736789887093a4c389152d Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 11:31:06 +0530
Subject: [PATCH 33/44] use dfault attebtion processors.

---
 tests/lora/test_lora_layers_old_backend.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index 0ae054f8679b..8253752fabfe 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -618,6 +618,7 @@ def test_unload_lora_sd(self):
         pipeline_components, lora_components = self.get_dummy_components()
         _, _, pipeline_inputs = self.get_dummy_inputs(with_generator=False)
         sd_pipe = StableDiffusionPipeline(**pipeline_components)
+        sd_pipe.unet.set_default_attn_processor()
 
         original_images = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images
         orig_image_slice = original_images[0, -3:, -3:, -1]
@@ -998,6 +999,7 @@ def test_unload_lora_sdxl(self):
         pipeline_components, lora_components = self.get_dummy_components()
         _, _, pipeline_inputs = self.get_dummy_inputs(with_generator=False)
         sd_pipe = StableDiffusionXLPipeline(**pipeline_components)
+        sd_pipe.unet.set_default_attn_processor()
 
         original_images = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images
         orig_image_slice = original_images[0, -3:, -3:, -1]

From dc085bf1b726a25b28456a46dd39c49d4db3f292 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 11:37:44 +0530
Subject: [PATCH 34/44] debu

---
 tests/lora/test_lora_layers_old_backend.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index 8253752fabfe..2ce176f6e07f 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -1128,6 +1128,7 @@ def test_lora_fuse_nan(self):
 
         # corrupt one LoRA weight with `inf` values
         with torch.no_grad():
+            print(sd_pipe.unet.mid_block.attentions[0].transformer_blocks[0].attn1.to_q.lora_layer.down.weight.data[0, :3])
             sd_pipe.unet.mid_block.attentions[0].transformer_blocks[0].attn1.to_q.lora_layer.down.weight += float(
                 "inf"
             )

From 7d9cbaa851fcd5e4bf7894e6cdaaecb8f36802af Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 11:38:22 +0530
Subject: [PATCH 35/44] debug nan

---
 tests/lora/test_lora_layers_old_backend.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index 2ce176f6e07f..5d6d14d8cc97 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -1132,6 +1132,7 @@ def test_lora_fuse_nan(self):
             sd_pipe.unet.mid_block.attentions[0].transformer_blocks[0].attn1.to_q.lora_layer.down.weight += float(
                 "inf"
             )
+            print(sd_pipe.unet.mid_block.attentions[0].transformer_blocks[0].attn1.to_q.lora_layer.down.weight.data[0, :3])
 
         # with `safe_fusing=True` we should see an Error
         with self.assertRaises(ValueError):

From d15af62ad05a73f06e237202d82aae989e4be70f Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 11:40:09 +0530
Subject: [PATCH 36/44] debug nan

---
 src/diffusers/models/lora.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index daac8f902cd6..bebcfaa9d24c 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -308,6 +308,7 @@ def _fuse_lora(self, lora_scale: float = 1.0, safe_fusing: bool = False):
         w_orig = self.weight.data.float()
         w_up = self.lora_layer.up.weight.data.float()
         w_down = self.lora_layer.down.weight.data.float()
+        print(w_down[0, :3], torch.isnan(w_down).any().item())
 
         if self.lora_layer.network_alpha is not None:
             w_up = w_up * self.lora_layer.network_alpha / self.lora_layer.rank

From a7b1606d66df5d647bfde1563a05aaac852171ba Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 11:41:00 +0530
Subject: [PATCH 37/44] debug nan

---
 src/diffusers/models/lora.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index bebcfaa9d24c..fed78b075205 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -308,7 +308,6 @@ def _fuse_lora(self, lora_scale: float = 1.0, safe_fusing: bool = False):
         w_orig = self.weight.data.float()
         w_up = self.lora_layer.up.weight.data.float()
         w_down = self.lora_layer.down.weight.data.float()
-        print(w_down[0, :3], torch.isnan(w_down).any().item())
 
         if self.lora_layer.network_alpha is not None:
             w_up = w_up * self.lora_layer.network_alpha / self.lora_layer.rank
@@ -388,6 +387,8 @@ def _fuse_lora(self, lora_scale: float = 1.0, safe_fusing: bool = False):
         w_up = self.lora_layer.up.weight.data.float()
         w_down = self.lora_layer.down.weight.data.float()
 
+        print(w_down[0, :3], torch.isnan(w_down).any().item())
+
         if self.lora_layer.network_alpha is not None:
             w_up = w_up * self.lora_layer.network_alpha / self.lora_layer.rank
 

From b507b403ebb0cc16934844efd09778353c143a36 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 11:44:37 +0530
Subject: [PATCH 38/44] use NaN instead of inf

---
 tests/lora/test_lora_layers_old_backend.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index 5d6d14d8cc97..b7c71a70cc64 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -1130,7 +1130,7 @@ def test_lora_fuse_nan(self):
         with torch.no_grad():
             print(sd_pipe.unet.mid_block.attentions[0].transformer_blocks[0].attn1.to_q.lora_layer.down.weight.data[0, :3])
             sd_pipe.unet.mid_block.attentions[0].transformer_blocks[0].attn1.to_q.lora_layer.down.weight += float(
-                "inf"
+                "NaN"
             )
             print(sd_pipe.unet.mid_block.attentions[0].transformer_blocks[0].attn1.to_q.lora_layer.down.weight.data[0, :3])
 

From 67df56fe8d4f0f3a5bfac8e0c2d17dcefd7738ac Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 11:45:13 +0530
Subject: [PATCH 39/44] remove comments.

---
 src/diffusers/models/lora.py               | 2 --
 tests/lora/test_lora_layers_old_backend.py | 2 --
 2 files changed, 4 deletions(-)

diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index fed78b075205..daac8f902cd6 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -387,8 +387,6 @@ def _fuse_lora(self, lora_scale: float = 1.0, safe_fusing: bool = False):
         w_up = self.lora_layer.up.weight.data.float()
         w_down = self.lora_layer.down.weight.data.float()
 
-        print(w_down[0, :3], torch.isnan(w_down).any().item())
-
         if self.lora_layer.network_alpha is not None:
             w_up = w_up * self.lora_layer.network_alpha / self.lora_layer.rank
 
diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index b7c71a70cc64..7b847c745a4b 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -1128,11 +1128,9 @@ def test_lora_fuse_nan(self):
 
         # corrupt one LoRA weight with `inf` values
         with torch.no_grad():
-            print(sd_pipe.unet.mid_block.attentions[0].transformer_blocks[0].attn1.to_q.lora_layer.down.weight.data[0, :3])
             sd_pipe.unet.mid_block.attentions[0].transformer_blocks[0].attn1.to_q.lora_layer.down.weight += float(
                 "NaN"
             )
-            print(sd_pipe.unet.mid_block.attentions[0].transformer_blocks[0].attn1.to_q.lora_layer.down.weight.data[0, :3])
 
         # with `safe_fusing=True` we should see an Error
         with self.assertRaises(ValueError):

From ccda99293b9bff2380143a5d413688517efcd4d7 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 13:22:18 +0530
Subject: [PATCH 40/44] fix: test_text_encoder_lora_state_dict_unchanged

---
 tests/lora/test_lora_layers_old_backend.py | 47 +++++++++++++++-------
 1 file changed, 33 insertions(+), 14 deletions(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index 7b847c745a4b..3709a0e045ce 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -858,7 +858,7 @@ def test_stable_diffusion_inpaint_lora(self):
 class SDXLLoraLoaderMixinTests(unittest.TestCase):
     lora_rank = 4
 
-    def get_dummy_components(self):
+    def get_dummy_components(self, modify_text_encoder=True):
         torch.manual_seed(0)
         unet = UNet2DConditionModel(
             block_out_channels=(32, 64),
@@ -916,18 +916,23 @@ def get_dummy_components(self):
         tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
 
         _, unet_lora_params = create_unet_lora_layers(unet, rank=self.lora_rank)
-        text_encoder_lora_params = StableDiffusionXLLoraLoaderMixin._modify_text_encoder(
-            text_encoder, dtype=torch.float32, rank=self.lora_rank
-        )
-        text_encoder_lora_params = set_lora_weights(
-            text_encoder_lora_state_dict(text_encoder), randn_weight=True, var=0.1
-        )
-        text_encoder_two_lora_params = StableDiffusionXLLoraLoaderMixin._modify_text_encoder(
-            text_encoder_2, dtype=torch.float32, rank=self.lora_rank
-        )
-        text_encoder_two_lora_params = set_lora_weights(
-            text_encoder_lora_state_dict(text_encoder_2), randn_weight=True, var=0.1
-        )
+        
+        if modify_text_encoder:
+            text_encoder_lora_params = StableDiffusionXLLoraLoaderMixin._modify_text_encoder(
+                text_encoder, dtype=torch.float32, rank=self.lora_rank
+            )
+            text_encoder_lora_params = set_lora_weights(
+                text_encoder_lora_state_dict(text_encoder), randn_weight=True, var=0.1
+            )
+            text_encoder_two_lora_params = StableDiffusionXLLoraLoaderMixin._modify_text_encoder(
+                text_encoder_2, dtype=torch.float32, rank=self.lora_rank
+            )
+            text_encoder_two_lora_params = set_lora_weights(
+                text_encoder_lora_state_dict(text_encoder_2), randn_weight=True, var=0.1
+            )
+        else:
+            text_encoder_lora_params = None 
+            text_encoder_two_lora_params = None
 
         pipeline_components = {
             "unet": unet,
@@ -1052,7 +1057,7 @@ def test_load_lora_locally(self):
         sd_pipe.unload_lora_weights()
 
     def test_text_encoder_lora_state_dict_unchanged(self):
-        pipeline_components, lora_components = self.get_dummy_components()
+        pipeline_components, lora_components = self.get_dummy_components(modify_text_encoder=False)
         sd_pipe = StableDiffusionXLPipeline(**pipeline_components)
 
         text_encoder_1_sd_keys = sorted(sd_pipe.text_encoder.state_dict().keys())
@@ -1061,6 +1066,20 @@ def test_text_encoder_lora_state_dict_unchanged(self):
         sd_pipe = sd_pipe.to(torch_device)
         sd_pipe.set_progress_bar_config(disable=None)
 
+        # Modify the text encoder.
+        _ = StableDiffusionXLLoraLoaderMixin._modify_text_encoder(
+            sd_pipe.text_encoder, dtype=torch.float32, rank=self.lora_rank
+        )
+        lora_components["text_encoder_lora_params"] = set_lora_weights(
+            text_encoder_lora_state_dict(sd_pipe.text_encoder), randn_weight=True, var=0.1
+        )
+        _ = StableDiffusionXLLoraLoaderMixin._modify_text_encoder(
+            sd_pipe.text_encoder_2, dtype=torch.float32, rank=self.lora_rank
+        )
+        lora_components["text_encoder_two_lora_params"] = set_lora_weights(
+            text_encoder_lora_state_dict(sd_pipe.text_encoder_2), randn_weight=True, var=0.1
+        )
+
         with tempfile.TemporaryDirectory() as tmpdirname:
             StableDiffusionXLPipeline.save_lora_weights(
                 save_directory=tmpdirname,

From bd995dfd76b6d88e3f26b708cc22cb57a97aff66 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 13:23:29 +0530
Subject: [PATCH 41/44] attention processor default

---
 tests/lora/test_lora_layers_old_backend.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index 3709a0e045ce..182bd718280a 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -1195,6 +1195,7 @@ def test_unfuse_lora(self):
         sd_pipe = StableDiffusionXLPipeline(**pipeline_components)
         sd_pipe = sd_pipe.to(torch_device)
         sd_pipe.set_progress_bar_config(disable=None)
+        sd_pipe.unet.set_default_attn_processor()
 
         _, _, pipeline_inputs = self.get_dummy_inputs(with_generator=False)
 

From e80acf3cc8f1eb9a2e9243e3b32ff3b5afef67a0 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 13:26:23 +0530
Subject: [PATCH 42/44] default attention processors.

---
 tests/lora/test_lora_layers_old_backend.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index 182bd718280a..13c539dba936 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -1318,6 +1318,7 @@ def test_with_different_scales(self):
         sd_pipe = StableDiffusionXLPipeline(**pipeline_components)
         sd_pipe = sd_pipe.to(torch_device)
         sd_pipe.set_progress_bar_config(disable=None)
+        sd_pipe.unet.set_default_attn_processor()
 
         _, _, pipeline_inputs = self.get_dummy_inputs(with_generator=False)
         original_images = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images

From 046a8b3174d3ff77dfd488a7aaad647120e7d442 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 13:31:21 +0530
Subject: [PATCH 43/44] default

---
 tests/lora/test_lora_layers_old_backend.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index 13c539dba936..6b2822d6926e 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -1361,6 +1361,7 @@ def test_with_different_scales_fusion_equivalence(self):
         sd_pipe = StableDiffusionXLPipeline(**pipeline_components)
         sd_pipe = sd_pipe.to(torch_device)
         sd_pipe.set_progress_bar_config(disable=None)
+        sd_pipe.unet.set_default_attn_processor()
 
         _, _, pipeline_inputs = self.get_dummy_inputs(with_generator=False)
 

From 6f2dda30fa3bae6869d4f6f42a65d6e08d35053f Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 28 Dec 2023 13:38:07 +0530
Subject: [PATCH 44/44] style

---
 tests/lora/test_lora_layers_old_backend.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index 6b2822d6926e..7d6d30169455 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -916,7 +916,7 @@ def get_dummy_components(self, modify_text_encoder=True):
         tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
 
         _, unet_lora_params = create_unet_lora_layers(unet, rank=self.lora_rank)
-        
+
         if modify_text_encoder:
             text_encoder_lora_params = StableDiffusionXLLoraLoaderMixin._modify_text_encoder(
                 text_encoder, dtype=torch.float32, rank=self.lora_rank
@@ -931,7 +931,7 @@ def get_dummy_components(self, modify_text_encoder=True):
                 text_encoder_lora_state_dict(text_encoder_2), randn_weight=True, var=0.1
             )
         else:
-            text_encoder_lora_params = None 
+            text_encoder_lora_params = None
             text_encoder_two_lora_params = None
 
         pipeline_components = {