From cf2c0ba83fb5b8841b8b8a6872da33aa6c878b67 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Fri, 22 Sep 2023 16:45:26 +0000
Subject: [PATCH 001/134] v1

---
 src/diffusers/loaders.py                      | 71 ++++++++++++++-----
 src/diffusers/models/__init__.py              |  4 +-
 src/diffusers/models/attention.py             | 18 +++--
 src/diffusers/models/attention_processor.py   | 32 ++++++---
 src/diffusers/models/constants.py             | 32 +++++++++
 src/diffusers/models/embeddings.py            |  6 +-
 src/diffusers/models/lora.py                  |  8 +--
 src/diffusers/models/resnet.py                | 50 +++++++++----
 src/diffusers/models/transformer_2d.py        | 36 +++++++---
 .../pipeline_stable_diffusion.py              | 21 +++---
 .../pipeline_stable_diffusion_xl.py           | 25 +++++--
 src/diffusers/utils/__init__.py               |  8 ++-
 src/diffusers/utils/peft_utils.py             | 18 +++++
 src/diffusers/utils/state_dict_utils.py       | 25 ++++++-
 14 files changed, 274 insertions(+), 80 deletions(-)
 create mode 100644 src/diffusers/models/constants.py

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index bc40cf9a18ea..ad88d7ba686a 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import importlib
 import os
 import re
 from collections import defaultdict
@@ -24,9 +23,9 @@
 import safetensors
 import torch
 from huggingface_hub import hf_hub_download, model_info
-from packaging import version
 from torch import nn
 
+from .models import USE_PEFT_BACKEND
 from .models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT, load_model_dict_into_meta
 from .utils import (
     DIFFUSERS_CACHE,
@@ -34,10 +33,10 @@
     _get_model_file,
     convert_state_dict_to_diffusers,
     convert_state_dict_to_peft,
+    convert_unet_state_dict_to_peft,
     deprecate,
     is_accelerate_available,
     is_omegaconf_available,
-    is_peft_available,
     is_transformers_available,
     logging,
     recurse_remove_peft_layers,
@@ -66,19 +65,6 @@
 CUSTOM_DIFFUSION_WEIGHT_NAME = "pytorch_custom_diffusion_weights.bin"
 CUSTOM_DIFFUSION_WEIGHT_NAME_SAFE = "pytorch_custom_diffusion_weights.safetensors"
 
-
-# Below should be `True` if the current version of `peft` and `transformers` are compatible with
-# PEFT backend. Will automatically fall back to PEFT backend if the correct versions of the libraries are
-# available.
-# For PEFT it is has to be greater than 0.6.0 and for transformers it has to be greater than 4.33.1.
-_required_peft_version = is_peft_available() and version.parse(
-    version.parse(importlib.metadata.version("peft")).base_version
-) > version.parse("0.5")
-_required_transformers_version = version.parse(
-    version.parse(importlib.metadata.version("transformers")).base_version
-) > version.parse("4.33")
-
-USE_PEFT_BACKEND = _required_peft_version and _required_transformers_version
 LORA_DEPRECATION_MESSAGE = "You are using an old version of LoRA backend. This will be deprecated in the next releases in favor of PEFT make sure to install the latest PEFT and transformers packages in the future."
 
 
@@ -255,6 +241,7 @@ def map_from(module, state_dict, *args, **kwargs):
 class UNet2DConditionLoadersMixin:
     text_encoder_name = TEXT_ENCODER_NAME
     unet_name = UNET_NAME
+    use_peft_backend = USE_PEFT_BACKEND
 
     def load_attn_procs(self, pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]], **kwargs):
         r"""
@@ -399,7 +386,7 @@ def load_attn_procs(self, pretrained_model_name_or_path_or_dict: Union[str, Dict
         # fill attn processors
         lora_layers_list = []
 
-        is_lora = all(("lora" in k or k.endswith(".alpha")) for k in state_dict.keys())
+        is_lora = all(("lora" in k or k.endswith(".alpha")) for k in state_dict.keys()) and not self.use_peft_backend
         is_custom_diffusion = any("custom_diffusion" in k for k in state_dict.keys())
 
         if is_lora:
@@ -513,6 +500,8 @@ def load_attn_procs(self, pretrained_model_name_or_path_or_dict: Union[str, Dict
                         cross_attention_dim=cross_attention_dim,
                     )
                     attn_processors[key].load_state_dict(value_dict)
+        elif self.use_peft_backend:
+            pass
         else:
             raise ValueError(
                 f"{model_file} does not seem to be in the correct format expected by LoRA or Custom Diffusion training."
@@ -1443,7 +1432,9 @@ def _maybe_map_sgm_blocks_to_diffusers(cls, state_dict, unet_config, delimiter="
         return new_state_dict
 
     @classmethod
-    def load_lora_into_unet(cls, state_dict, network_alphas, unet, low_cpu_mem_usage=None, _pipeline=None):
+    def load_lora_into_unet(
+        cls, state_dict, network_alphas, unet, low_cpu_mem_usage=None, _pipeline=None, adapter_name="default"
+    ):
         """
         This will load the LoRA layers specified in `state_dict` into `unet`.
 
@@ -1461,6 +1452,8 @@ def load_lora_into_unet(cls, state_dict, network_alphas, unet, low_cpu_mem_usage
                 tries to not use more than 1x model size in CPU memory (including peak memory) while loading the model.
                 Only supported for PyTorch >= 1.9.0. If you are using an older version of PyTorch, setting this
                 argument to `True` will raise an error.
+            adapter_name (`str`, *optional*):
+                The name of the adapter to load the weights into. By default we use `"default"`
         """
         low_cpu_mem_usage = low_cpu_mem_usage if low_cpu_mem_usage is not None else _LOW_CPU_MEM_USAGE_DEFAULT
         # If the serialization format is new (introduced in https://github.com/huggingface/diffusers/pull/2918),
@@ -1487,6 +1480,48 @@ def load_lora_into_unet(cls, state_dict, network_alphas, unet, low_cpu_mem_usage
             warn_message = "You have saved the LoRA weights using the old format. To convert the old LoRA weights to the new format, you can first load them in a dictionary and then create a new dictionary like the following: `new_state_dict = {f'unet.{module_name}': params for module_name, params in old_state_dict.items()}`."
             logger.warn(warn_message)
 
+        if cls.use_peft_backend:
+            from peft import LoraConfig, inject_adapter_in_model, set_peft_model_state_dict
+
+            state_dict = convert_unet_state_dict_to_peft(state_dict)
+            target_modules = []
+            ranks = []
+            for key in state_dict.keys():
+                # filter out the name
+                filtered_name = ".".join(key.split(".")[:-2])
+                target_modules.append(filtered_name)
+                if "lora_B" in key:
+                    rank = state_dict[key].shape[1]
+                    ranks.append(rank)
+
+            current_rank = ranks[0]
+            if not all(rank == current_rank for rank in ranks):
+                raise ValueError("Multi-rank not supported yet")
+
+            # TODO: support multi-alpha
+            alpha = current_rank
+
+            lora_config = LoraConfig(
+                r=current_rank,
+                lora_alpha=alpha,
+                target_modules=target_modules,
+            )
+
+            inject_adapter_in_model(lora_config, unet, adapter_name=adapter_name)
+
+            incompatible_keys = set_peft_model_state_dict(unet, state_dict)
+
+            if incompatible_keys is not None:
+                # check only for unexpected keys
+                if hasattr(incompatible_keys, "unexpected_keys") and len(incompatible_keys.unexpected_keys) > 0:
+                    logger.warning(
+                        f"Loading adapter weights from state_dict led to unexpected keys not found in the model: "
+                        f" {incompatible_keys.unexpected_keys}. "
+                    )
+                elif hasattr(incompatible_keys, "unexpected_keys") and len(incompatible_keys.unexpected_keys) == 0:
+                    # At this point all LoRA layars has been loaded so we init back an empty state_dict
+                    state_dict = {}
+
         unet.load_attn_procs(
             state_dict, network_alphas=network_alphas, low_cpu_mem_usage=low_cpu_mem_usage, _pipeline=_pipeline
         )
diff --git a/src/diffusers/models/__init__.py b/src/diffusers/models/__init__.py
index 75ddb21fb15d..c05410663f60 100644
--- a/src/diffusers/models/__init__.py
+++ b/src/diffusers/models/__init__.py
@@ -17,7 +17,7 @@
 from ..utils import _LazyModule, is_flax_available, is_torch_available
 
 
-_import_structure = {}
+_import_structure = {"constants": ["USE_PEFT_BACKEND"]}
 
 if is_torch_available():
     _import_structure["adapter"] = ["MultiAdapter", "T2IAdapter"]
@@ -44,6 +44,8 @@
 
 
 if TYPE_CHECKING:
+    from .constants import USE_PEFT_BACKEND
+
     if is_torch_available():
         from .adapter import MultiAdapter, T2IAdapter
         from .autoencoder_asym_kl import AsymmetricAutoencoderKL
diff --git a/src/diffusers/models/attention.py b/src/diffusers/models/attention.py
index 892d44a03137..b45cb6a0717e 100644
--- a/src/diffusers/models/attention.py
+++ b/src/diffusers/models/attention.py
@@ -20,6 +20,7 @@
 from ..utils.torch_utils import maybe_allow_in_graph
 from .activations import get_activation
 from .attention_processor import Attention
+from .constants import USE_PEFT_BACKEND
 from .embeddings import CombinedTimestepLabelEmbeddings
 from .lora import LoRACompatibleLinear
 
@@ -280,6 +281,7 @@ def __init__(
         super().__init__()
         inner_dim = int(dim * mult)
         dim_out = dim_out if dim_out is not None else dim
+        linear_cls = LoRACompatibleLinear if not USE_PEFT_BACKEND else nn.Linear
 
         if activation_fn == "gelu":
             act_fn = GELU(dim, inner_dim)
@@ -296,14 +298,16 @@ def __init__(
         # project dropout
         self.net.append(nn.Dropout(dropout))
         # project out
-        self.net.append(LoRACompatibleLinear(inner_dim, dim_out))
+        self.net.append(linear_cls(inner_dim, dim_out))
         # FF as used in Vision Transformer, MLP-Mixer, etc. have a final dropout
         if final_dropout:
             self.net.append(nn.Dropout(dropout))
 
     def forward(self, hidden_states, scale: float = 1.0):
+        compatible_cls = (GEGLU,) if USE_PEFT_BACKEND else (GEGLU, LoRACompatibleLinear)
+
         for module in self.net:
-            if isinstance(module, (LoRACompatibleLinear, GEGLU)):
+            if isinstance(module, compatible_cls):
                 hidden_states = module(hidden_states, scale)
             else:
                 hidden_states = module(hidden_states)
@@ -343,7 +347,9 @@ class GEGLU(nn.Module):
 
     def __init__(self, dim_in: int, dim_out: int):
         super().__init__()
-        self.proj = LoRACompatibleLinear(dim_in, dim_out * 2)
+        linear_cls = LoRACompatibleLinear if not USE_PEFT_BACKEND else nn.Linear
+
+        self.proj = linear_cls(dim_in, dim_out * 2)
 
     def gelu(self, gate):
         if gate.device.type != "mps":
@@ -352,7 +358,11 @@ def gelu(self, gate):
         return F.gelu(gate.to(dtype=torch.float32)).to(dtype=gate.dtype)
 
     def forward(self, hidden_states, scale: float = 1.0):
-        hidden_states, gate = self.proj(hidden_states, scale).chunk(2, dim=-1)
+        hidden_states, gate = (
+            self.proj(hidden_states, scale).chunk(2, dim=-1)
+            if not USE_PEFT_BACKEND
+            else self.proj(hidden_states).chunk(2, dim=-1)
+        )
         return hidden_states * self.gelu(gate)
 
 
diff --git a/src/diffusers/models/attention_processor.py b/src/diffusers/models/attention_processor.py
index fba5bddb5def..cd2f2f2ab7f3 100644
--- a/src/diffusers/models/attention_processor.py
+++ b/src/diffusers/models/attention_processor.py
@@ -21,6 +21,7 @@
 from ..utils import deprecate, logging
 from ..utils.import_utils import is_xformers_available
 from ..utils.torch_utils import maybe_allow_in_graph
+from .constants import USE_PEFT_BACKEND
 from .lora import LoRACompatibleLinear, LoRALinearLayer
 
 
@@ -137,22 +138,27 @@ def __init__(
                 f"unknown cross_attention_norm: {cross_attention_norm}. Should be None, 'layer_norm' or 'group_norm'"
             )
 
-        self.to_q = LoRACompatibleLinear(query_dim, self.inner_dim, bias=bias)
+        if USE_PEFT_BACKEND:
+            linear_cls = nn.Linear
+        else:
+            linear_cls = LoRACompatibleLinear
+
+        self.to_q = linear_cls(query_dim, self.inner_dim, bias=bias)
 
         if not self.only_cross_attention:
             # only relevant for the `AddedKVProcessor` classes
-            self.to_k = LoRACompatibleLinear(self.cross_attention_dim, self.inner_dim, bias=bias)
-            self.to_v = LoRACompatibleLinear(self.cross_attention_dim, self.inner_dim, bias=bias)
+            self.to_k = linear_cls(self.cross_attention_dim, self.inner_dim, bias=bias)
+            self.to_v = linear_cls(self.cross_attention_dim, self.inner_dim, bias=bias)
         else:
             self.to_k = None
             self.to_v = None
 
         if self.added_kv_proj_dim is not None:
-            self.add_k_proj = LoRACompatibleLinear(added_kv_proj_dim, self.inner_dim)
-            self.add_v_proj = LoRACompatibleLinear(added_kv_proj_dim, self.inner_dim)
+            self.add_k_proj = linear_cls(added_kv_proj_dim, self.inner_dim)
+            self.add_v_proj = linear_cls(added_kv_proj_dim, self.inner_dim)
 
         self.to_out = nn.ModuleList([])
-        self.to_out.append(LoRACompatibleLinear(self.inner_dim, query_dim, bias=out_bias))
+        self.to_out.append(linear_cls(self.inner_dim, query_dim, bias=out_bias))
         self.to_out.append(nn.Dropout(dropout))
 
         # set attention processor
@@ -1010,15 +1016,19 @@ def __call__(
         if attn.group_norm is not None:
             hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)
 
-        query = attn.to_q(hidden_states, scale=scale)
+        query = attn.to_q(hidden_states, scale=scale) if not USE_PEFT_BACKEND else attn.to_q(hidden_states)
 
         if encoder_hidden_states is None:
             encoder_hidden_states = hidden_states
         elif attn.norm_cross:
             encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states)
 
-        key = attn.to_k(encoder_hidden_states, scale=scale)
-        value = attn.to_v(encoder_hidden_states, scale=scale)
+        key = (
+            attn.to_k(encoder_hidden_states, scale=scale) if not USE_PEFT_BACKEND else attn.to_k(encoder_hidden_states)
+        )
+        value = (
+            attn.to_v(encoder_hidden_states, scale=scale) if not USE_PEFT_BACKEND else attn.to_v(encoder_hidden_states)
+        )
 
         inner_dim = key.shape[-1]
         head_dim = inner_dim // attn.heads
@@ -1038,7 +1048,9 @@ def __call__(
         hidden_states = hidden_states.to(query.dtype)
 
         # linear proj
-        hidden_states = attn.to_out[0](hidden_states, scale=scale)
+        hidden_states = (
+            attn.to_out[0](hidden_states, scale=scale) if not USE_PEFT_BACKEND else attn.to_out[0](hidden_states)
+        )
         # dropout
         hidden_states = attn.to_out[1](hidden_states)
 
diff --git a/src/diffusers/models/constants.py b/src/diffusers/models/constants.py
new file mode 100644
index 000000000000..890b47cfc21e
--- /dev/null
+++ b/src/diffusers/models/constants.py
@@ -0,0 +1,32 @@
+# Copyright 2023 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import importlib
+
+from packaging import version
+
+from ..utils.import_utils import is_peft_available
+
+
+# Below should be `True` if the current version of `peft` and `transformers` are compatible with
+# PEFT backend. Will automatically fall back to PEFT backend if the correct versions of the libraries are
+# available.
+# For PEFT it is has to be greater than 0.6.0 and for transformers it has to be greater than 4.33.1.
+_required_peft_version = is_peft_available() and version.parse(
+    version.parse(importlib.metadata.version("peft")).base_version
+) > version.parse("0.5")
+_required_transformers_version = version.parse(
+    version.parse(importlib.metadata.version("transformers")).base_version
+) > version.parse("4.33")
+
+USE_PEFT_BACKEND = _required_peft_version and _required_transformers_version
diff --git a/src/diffusers/models/embeddings.py b/src/diffusers/models/embeddings.py
index e05092de3d10..614ce5ed40bb 100644
--- a/src/diffusers/models/embeddings.py
+++ b/src/diffusers/models/embeddings.py
@@ -19,6 +19,7 @@
 from torch import nn
 
 from .activations import get_activation
+from .constants import USE_PEFT_BACKEND
 from .lora import LoRACompatibleLinear
 
 
@@ -166,8 +167,9 @@ def __init__(
         cond_proj_dim=None,
     ):
         super().__init__()
+        linear_cls = nn.Linear if USE_PEFT_BACKEND else LoRACompatibleLinear
 
-        self.linear_1 = LoRACompatibleLinear(in_channels, time_embed_dim)
+        self.linear_1 = linear_cls(in_channels, time_embed_dim)
 
         if cond_proj_dim is not None:
             self.cond_proj = nn.Linear(cond_proj_dim, in_channels, bias=False)
@@ -180,7 +182,7 @@ def __init__(
             time_embed_dim_out = out_dim
         else:
             time_embed_dim_out = time_embed_dim
-        self.linear_2 = LoRACompatibleLinear(time_embed_dim, time_embed_dim_out)
+        self.linear_2 = linear_cls(time_embed_dim, time_embed_dim_out)
 
         if post_act_fn is None:
             self.post_act = None
diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index 07eeae712f71..68481dda5aae 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -26,13 +26,7 @@
 
 
 def adjust_lora_scale_text_encoder(text_encoder, lora_scale: float = 1.0, use_peft_backend: bool = False):
-    if use_peft_backend:
-        from peft.tuners.lora import LoraLayer
-
-        for module in text_encoder.modules():
-            if isinstance(module, LoraLayer):
-                module.scaling[module.active_adapter] = lora_scale
-    else:
+    if not use_peft_backend:
         for _, attn_module in text_encoder_attn_modules(text_encoder):
             if isinstance(attn_module.q_proj, PatchedLoraProjection):
                 attn_module.q_proj.lora_scale = lora_scale
diff --git a/src/diffusers/models/resnet.py b/src/diffusers/models/resnet.py
index ac66e2271c61..be80157b93b6 100644
--- a/src/diffusers/models/resnet.py
+++ b/src/diffusers/models/resnet.py
@@ -23,6 +23,7 @@
 from .activations import get_activation
 from .attention import AdaGroupNorm
 from .attention_processor import SpatialNorm
+from .constants import USE_PEFT_BACKEND
 from .lora import LoRACompatibleConv, LoRACompatibleLinear
 
 
@@ -122,12 +123,13 @@ def __init__(self, channels, use_conv=False, use_conv_transpose=False, out_chann
         self.use_conv = use_conv
         self.use_conv_transpose = use_conv_transpose
         self.name = name
+        conv_cls = nn.Conv2d if USE_PEFT_BACKEND else LoRACompatibleConv
 
         conv = None
         if use_conv_transpose:
             conv = nn.ConvTranspose2d(channels, self.out_channels, 4, 2, 1)
         elif use_conv:
-            conv = LoRACompatibleConv(self.channels, self.out_channels, 3, padding=1)
+            conv = conv_cls(self.channels, self.out_channels, 3, padding=1)
 
         # TODO(Suraj, Patrick) - clean up after weight dicts are correctly renamed
         if name == "conv":
@@ -164,7 +166,7 @@ def forward(self, hidden_states, output_size=None, scale: float = 1.0):
             hidden_states = hidden_states.to(dtype)
 
         # TODO(Suraj, Patrick) - clean up after weight dicts are correctly renamed
-        if self.use_conv:
+        if self.use_conv and not USE_PEFT_BACKEND:
             if self.name == "conv":
                 if isinstance(self.conv, LoRACompatibleConv):
                     hidden_states = self.conv(hidden_states, scale)
@@ -175,6 +177,11 @@ def forward(self, hidden_states, output_size=None, scale: float = 1.0):
                     hidden_states = self.Conv2d_0(hidden_states, scale)
                 else:
                     hidden_states = self.Conv2d_0(hidden_states)
+        else:
+            if self.name == "conv":
+                hidden_states = self.conv(hidden_states)
+            else:
+                hidden_states = self.Conv2d_0(hidden_states)
 
         return hidden_states
 
@@ -201,9 +208,10 @@ def __init__(self, channels, use_conv=False, out_channels=None, padding=1, name=
         self.padding = padding
         stride = 2
         self.name = name
+        conv_cls = nn.Conv2d if USE_PEFT_BACKEND else LoRACompatibleConv
 
         if use_conv:
-            conv = LoRACompatibleConv(self.channels, self.out_channels, 3, stride=stride, padding=padding)
+            conv = conv_cls(self.channels, self.out_channels, 3, stride=stride, padding=padding)
         else:
             assert self.channels == self.out_channels
             conv = nn.AvgPool2d(kernel_size=stride, stride=stride)
@@ -219,13 +227,18 @@ def __init__(self, channels, use_conv=False, out_channels=None, padding=1, name=
 
     def forward(self, hidden_states, scale: float = 1.0):
         assert hidden_states.shape[1] == self.channels
+
         if self.use_conv and self.padding == 0:
             pad = (0, 1, 0, 1)
             hidden_states = F.pad(hidden_states, pad, mode="constant", value=0)
 
         assert hidden_states.shape[1] == self.channels
-        if isinstance(self.conv, LoRACompatibleConv):
-            hidden_states = self.conv(hidden_states, scale)
+
+        if not USE_PEFT_BACKEND:
+            if isinstance(self.conv, LoRACompatibleConv):
+                hidden_states = self.conv(hidden_states, scale)
+            else:
+                hidden_states = self.conv(hidden_states)
         else:
             hidden_states = self.conv(hidden_states)
 
@@ -534,6 +547,9 @@ def __init__(
         self.time_embedding_norm = time_embedding_norm
         self.skip_time_act = skip_time_act
 
+        linear_cls = nn.Linear if USE_PEFT_BACKEND else LoRACompatibleLinear
+        conv_cls = nn.Conv2d if USE_PEFT_BACKEND else LoRACompatibleConv
+
         if groups_out is None:
             groups_out = groups
 
@@ -544,13 +560,13 @@ def __init__(
         else:
             self.norm1 = torch.nn.GroupNorm(num_groups=groups, num_channels=in_channels, eps=eps, affine=True)
 
-        self.conv1 = LoRACompatibleConv(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
+        self.conv1 = conv_cls(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
 
         if temb_channels is not None:
             if self.time_embedding_norm == "default":
-                self.time_emb_proj = LoRACompatibleLinear(temb_channels, out_channels)
+                self.time_emb_proj = linear_cls(temb_channels, out_channels)
             elif self.time_embedding_norm == "scale_shift":
-                self.time_emb_proj = LoRACompatibleLinear(temb_channels, 2 * out_channels)
+                self.time_emb_proj = linear_cls(temb_channels, 2 * out_channels)
             elif self.time_embedding_norm == "ada_group" or self.time_embedding_norm == "spatial":
                 self.time_emb_proj = None
             else:
@@ -567,7 +583,7 @@ def __init__(
 
         self.dropout = torch.nn.Dropout(dropout)
         conv_2d_out_channels = conv_2d_out_channels or out_channels
-        self.conv2 = LoRACompatibleConv(out_channels, conv_2d_out_channels, kernel_size=3, stride=1, padding=1)
+        self.conv2 = conv_cls(out_channels, conv_2d_out_channels, kernel_size=3, stride=1, padding=1)
 
         self.nonlinearity = get_activation(non_linearity)
 
@@ -593,7 +609,7 @@ def __init__(
 
         self.conv_shortcut = None
         if self.use_in_shortcut:
-            self.conv_shortcut = LoRACompatibleConv(
+            self.conv_shortcut = conv_cls(
                 in_channels, conv_2d_out_channels, kernel_size=1, stride=1, padding=0, bias=conv_shortcut_bias
             )
 
@@ -634,12 +650,16 @@ def forward(self, input_tensor, temb, scale: float = 1.0):
                 else self.downsample(hidden_states)
             )
 
-        hidden_states = self.conv1(hidden_states, scale)
+        hidden_states = self.conv1(hidden_states, scale) if not USE_PEFT_BACKEND else self.conv1(hidden_states)
 
         if self.time_emb_proj is not None:
             if not self.skip_time_act:
                 temb = self.nonlinearity(temb)
-            temb = self.time_emb_proj(temb, scale)[:, :, None, None]
+            temb = (
+                self.time_emb_proj(temb, scale)[:, :, None, None]
+                if not USE_PEFT_BACKEND
+                else self.time_emb_proj(temb)[:, :, None, None]
+            )
 
         if temb is not None and self.time_embedding_norm == "default":
             hidden_states = hidden_states + temb
@@ -656,10 +676,12 @@ def forward(self, input_tensor, temb, scale: float = 1.0):
         hidden_states = self.nonlinearity(hidden_states)
 
         hidden_states = self.dropout(hidden_states)
-        hidden_states = self.conv2(hidden_states, scale)
+        hidden_states = self.conv2(hidden_states, scale) if not USE_PEFT_BACKEND else self.conv2(hidden_states)
 
         if self.conv_shortcut is not None:
-            input_tensor = self.conv_shortcut(input_tensor, scale)
+            input_tensor = (
+                self.conv_shortcut(input_tensor, scale) if not USE_PEFT_BACKEND else self.conv_shortcut(input_tensor)
+            )
 
         output_tensor = (input_tensor + hidden_states) / self.output_scale_factor
 
diff --git a/src/diffusers/models/transformer_2d.py b/src/diffusers/models/transformer_2d.py
index c96aef65f339..35111f66e959 100644
--- a/src/diffusers/models/transformer_2d.py
+++ b/src/diffusers/models/transformer_2d.py
@@ -22,6 +22,7 @@
 from ..models.embeddings import ImagePositionalEmbeddings
 from ..utils import BaseOutput, deprecate
 from .attention import BasicTransformerBlock
+from .constants import USE_PEFT_BACKEND
 from .embeddings import PatchEmbed
 from .lora import LoRACompatibleConv, LoRACompatibleLinear
 from .modeling_utils import ModelMixin
@@ -100,6 +101,9 @@ def __init__(
         self.attention_head_dim = attention_head_dim
         inner_dim = num_attention_heads * attention_head_dim
 
+        conv_cls = nn.Conv2d if USE_PEFT_BACKEND else LoRACompatibleConv
+        linear_cls = nn.Linear if USE_PEFT_BACKEND else LoRACompatibleLinear
+
         # 1. Transformer2DModel can process both standard continuous images of shape `(batch_size, num_channels, width, height)` as well as quantized image embeddings of shape `(batch_size, num_image_vectors)`
         # Define whether input is continuous or discrete depending on configuration
         self.is_input_continuous = (in_channels is not None) and (patch_size is None)
@@ -139,9 +143,9 @@ def __init__(
 
             self.norm = torch.nn.GroupNorm(num_groups=norm_num_groups, num_channels=in_channels, eps=1e-6, affine=True)
             if use_linear_projection:
-                self.proj_in = LoRACompatibleLinear(in_channels, inner_dim)
+                self.proj_in = linear_cls(in_channels, inner_dim)
             else:
-                self.proj_in = LoRACompatibleConv(in_channels, inner_dim, kernel_size=1, stride=1, padding=0)
+                self.proj_in = conv_cls(in_channels, inner_dim, kernel_size=1, stride=1, padding=0)
         elif self.is_input_vectorized:
             assert sample_size is not None, "Transformer2DModel over discrete input must provide sample_size"
             assert num_vector_embeds is not None, "Transformer2DModel over discrete input must provide num_embed"
@@ -197,9 +201,9 @@ def __init__(
         if self.is_input_continuous:
             # TODO: should use out_channels for continuous projections
             if use_linear_projection:
-                self.proj_out = LoRACompatibleLinear(inner_dim, in_channels)
+                self.proj_out = linear_cls(inner_dim, in_channels)
             else:
-                self.proj_out = LoRACompatibleConv(inner_dim, in_channels, kernel_size=1, stride=1, padding=0)
+                self.proj_out = conv_cls(inner_dim, in_channels, kernel_size=1, stride=1, padding=0)
         elif self.is_input_vectorized:
             self.norm_out = nn.LayerNorm(inner_dim)
             self.out = nn.Linear(inner_dim, self.num_vector_embeds - 1)
@@ -284,13 +288,21 @@ def forward(
 
             hidden_states = self.norm(hidden_states)
             if not self.use_linear_projection:
-                hidden_states = self.proj_in(hidden_states, scale=lora_scale)
+                hidden_states = (
+                    self.proj_in(hidden_states, scale=lora_scale)
+                    if not USE_PEFT_BACKEND
+                    else self.proj_in(hidden_states)
+                )
                 inner_dim = hidden_states.shape[1]
                 hidden_states = hidden_states.permute(0, 2, 3, 1).reshape(batch, height * width, inner_dim)
             else:
                 inner_dim = hidden_states.shape[1]
                 hidden_states = hidden_states.permute(0, 2, 3, 1).reshape(batch, height * width, inner_dim)
-                hidden_states = self.proj_in(hidden_states, scale=lora_scale)
+                hidden_states = (
+                    self.proj_in(hidden_states, scale=lora_scale)
+                    if not USE_PEFT_BACKEND
+                    else self.proj_in(hidden_states)
+                )
 
         elif self.is_input_vectorized:
             hidden_states = self.latent_image_embedding(hidden_states)
@@ -326,9 +338,17 @@ def forward(
         if self.is_input_continuous:
             if not self.use_linear_projection:
                 hidden_states = hidden_states.reshape(batch, height, width, inner_dim).permute(0, 3, 1, 2).contiguous()
-                hidden_states = self.proj_out(hidden_states, scale=lora_scale)
+                hidden_states = (
+                    self.proj_out(hidden_states, scale=lora_scale)
+                    if not USE_PEFT_BACKEND
+                    else self.proj_out(hidden_states)
+                )
             else:
-                hidden_states = self.proj_out(hidden_states, scale=lora_scale)
+                hidden_states = (
+                    self.proj_out(hidden_states, scale=lora_scale)
+                    if not USE_PEFT_BACKEND
+                    else self.proj_out(hidden_states)
+                )
                 hidden_states = hidden_states.reshape(batch, height, width, inner_dim).permute(0, 3, 1, 2).contiguous()
 
             output = hidden_states + residual
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
index 61fb1620ac28..570e68648d47 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
@@ -25,11 +25,7 @@
 from ...models import AutoencoderKL, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import (
-    deprecate,
-    logging,
-    replace_example_docstring,
-)
+from ...utils import deprecate, logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import StableDiffusionPipelineOutput
@@ -650,9 +646,12 @@ def __call__(
         do_classifier_free_guidance = guidance_scale > 1.0
 
         # 3. Encode input prompt
-        text_encoder_lora_scale = (
-            cross_attention_kwargs.get("scale", None) if cross_attention_kwargs is not None else None
-        )
+        lora_scale = cross_attention_kwargs.get("scale", None) if cross_attention_kwargs is not None else None
+
+        if self.use_peft_backend:
+            scale_peft_layers(self.text_encoder, lora_scale)
+            scale_peft_layers(self.unet, lora_scale)
+
         prompt_embeds, negative_prompt_embeds = self.encode_prompt(
             prompt,
             device,
@@ -661,7 +660,7 @@ def __call__(
             negative_prompt,
             prompt_embeds=prompt_embeds,
             negative_prompt_embeds=negative_prompt_embeds,
-            lora_scale=text_encoder_lora_scale,
+            lora_scale=lora_scale,
             clip_skip=clip_skip,
         )
         # For classifier free guidance, we need to do two forward passes.
@@ -742,6 +741,10 @@ def __call__(
         # Offload all models
         self.maybe_free_model_hooks()
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+            unscale_peft_layers(self.unet, lora_scale)
+
         if not return_dict:
             return (image, has_nsfw_concept)
 
diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py
index 9f012dcbf0b4..3ee9b44dac4b 100644
--- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py
+++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py
@@ -37,6 +37,8 @@
     is_invisible_watermark_available,
     logging,
     replace_example_docstring,
+    scale_peft_layers,
+    unscale_peft_layers,
 )
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
@@ -749,9 +751,16 @@ def __call__(
         do_classifier_free_guidance = guidance_scale > 1.0
 
         # 3. Encode input prompt
-        text_encoder_lora_scale = (
-            cross_attention_kwargs.get("scale", None) if cross_attention_kwargs is not None else None
-        )
+        lora_scale = cross_attention_kwargs.get("scale", None) if cross_attention_kwargs is not None else None
+
+        if self.use_peft_backend:
+            scale_peft_layers(self.text_encoder, lora_scale)
+
+            if hasattr(self, "text_encoder_2"):
+                scale_peft_layers(self.text_encoder_2, lora_scale)
+
+            scale_peft_layers(self.unet, lora_scale)
+
         (
             prompt_embeds,
             negative_prompt_embeds,
@@ -769,7 +778,7 @@ def __call__(
             negative_prompt_embeds=negative_prompt_embeds,
             pooled_prompt_embeds=pooled_prompt_embeds,
             negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
-            lora_scale=text_encoder_lora_scale,
+            lora_scale=lora_scale,
             clip_skip=clip_skip,
         )
 
@@ -894,6 +903,14 @@ def __call__(
         # Offload all models
         self.maybe_free_model_hooks()
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+
+            if hasattr(self, "text_encoder_2"):
+                unscale_peft_layers(self.text_encoder_2, lora_scale)
+
+            unscale_peft_layers(self.unet, lora_scale)
+
         if not return_dict:
             return (image,)
 
diff --git a/src/diffusers/utils/__init__.py b/src/diffusers/utils/__init__.py
index 3cd185e86325..4eb93b04779e 100644
--- a/src/diffusers/utils/__init__.py
+++ b/src/diffusers/utils/__init__.py
@@ -83,9 +83,13 @@
 from .loading_utils import load_image
 from .logging import get_logger
 from .outputs import BaseOutput
-from .peft_utils import recurse_remove_peft_layers
+from .peft_utils import recurse_remove_peft_layers, scale_peft_layers, unscale_peft_layers
 from .pil_utils import PIL_INTERPOLATION, make_image_grid, numpy_to_pil, pt_to_pil
-from .state_dict_utils import convert_state_dict_to_diffusers, convert_state_dict_to_peft
+from .state_dict_utils import (
+    convert_state_dict_to_diffusers,
+    convert_state_dict_to_peft,
+    convert_unet_state_dict_to_peft,
+)
 
 
 logger = get_logger(__name__)
diff --git a/src/diffusers/utils/peft_utils.py b/src/diffusers/utils/peft_utils.py
index 9b34183ffaac..e61c0025ac5d 100644
--- a/src/diffusers/utils/peft_utils.py
+++ b/src/diffusers/utils/peft_utils.py
@@ -69,3 +69,21 @@ def recurse_remove_peft_layers(model):
                 torch.cuda.empty_cache()
 
     return model
+
+
+def scale_peft_layers(model, scale: float = None):
+    from peft.tuners.tuners_utils import BaseTunerLayer
+
+    if scale is not None and scale != 1.0:
+        for module in model.modules():
+            if isinstance(module, BaseTunerLayer):
+                module.scaling[module.active_adapter] *= scale
+
+
+def unscale_peft_layers(model, scale: float = None):
+    from peft.tuners.tuners_utils import BaseTunerLayer
+
+    if scale is not None and scale != 1.0 and scale != 0.0:
+        for module in model.modules():
+            if isinstance(module, BaseTunerLayer):
+                module.scaling[module.active_adapter] /= scale
diff --git a/src/diffusers/utils/state_dict_utils.py b/src/diffusers/utils/state_dict_utils.py
index acc64a5034ec..65f3f90c5ff4 100644
--- a/src/diffusers/utils/state_dict_utils.py
+++ b/src/diffusers/utils/state_dict_utils.py
@@ -28,6 +28,21 @@ class StateDictType(enum.Enum):
     DIFFUSERS = "diffusers"
 
 
+# We need to define a proper mapping for Unet since it uses different output keys than text encoder
+# e.g. to_q_lora -> q_proj / to_q
+UNET_TO_DIFFUSERS = {
+    ".to_out_lora.up": ".to_out.0.lora_B",
+    ".to_out_lora.down": ".to_out.0.lora_A",
+    ".to_q_lora.down": ".to_q.lora_A",
+    ".to_q_lora.up": ".to_q.lora_B",
+    ".to_k_lora.down": ".to_k.lora_A",
+    ".to_k_lora.up": ".to_k.lora_B",
+    ".to_v_lora.down": ".to_v.lora_A",
+    ".to_v_lora.up": ".to_v.lora_B",
+    ".processor.": ".",
+}
+
+
 DIFFUSERS_TO_PEFT = {
     ".q_proj.lora_linear_layer.up": ".q_proj.lora_B",
     ".q_proj.lora_linear_layer.down": ".q_proj.lora_A",
@@ -105,7 +120,7 @@ def convert_state_dict(state_dict, mapping):
             if pattern in k:
                 new_pattern = mapping[pattern]
                 k = k.replace(pattern, new_pattern)
-                break
+                # break
         converted_state_dict[k] = v
     return converted_state_dict
 
@@ -182,3 +197,11 @@ def convert_state_dict_to_diffusers(state_dict, original_type=None, **kwargs):
 
     mapping = DIFFUSERS_STATE_DICT_MAPPINGS[original_type]
     return convert_state_dict(state_dict, mapping)
+
+
+def convert_unet_state_dict_to_peft(state_dict):
+    r"""
+    Converts a state dict from UNet format to diffusers format - i.e. by removing some keys
+    """
+    mapping = UNET_TO_DIFFUSERS
+    return convert_state_dict(state_dict, mapping)

From 8759f55af136e440ea63bfd274e209ac700168a3 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Mon, 25 Sep 2023 15:24:07 +0000
Subject: [PATCH 002/134] add tests and fix previous failing tests

---
 src/diffusers/loaders.py               |   3 +-
 src/diffusers/models/modeling_utils.py | 130 +++++++++++++++++++++++++
 src/diffusers/utils/__init__.py        |   8 +-
 src/diffusers/utils/peft_utils.py      |  29 +++++-
 tests/lora/test_lora_layers_peft.py    |  87 +++++++++++++++--
 5 files changed, 246 insertions(+), 11 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index ad88d7ba686a..5153f4418f3f 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -1480,10 +1480,11 @@ def load_lora_into_unet(
             warn_message = "You have saved the LoRA weights using the old format. To convert the old LoRA weights to the new format, you can first load them in a dictionary and then create a new dictionary like the following: `new_state_dict = {f'unet.{module_name}': params for module_name, params in old_state_dict.items()}`."
             logger.warn(warn_message)
 
-        if cls.use_peft_backend:
+        if cls.use_peft_backend and len(state_dict.keys()) > 0:
             from peft import LoraConfig, inject_adapter_in_model, set_peft_model_state_dict
 
             state_dict = convert_unet_state_dict_to_peft(state_dict)
+
             target_modules = []
             ranks = []
             for key in state_dict.keys():
diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
index 67746ebacef2..b766c97a6a2e 100644
--- a/src/diffusers/models/modeling_utils.py
+++ b/src/diffusers/models/modeling_utils.py
@@ -32,10 +32,12 @@
     DIFFUSERS_CACHE,
     FLAX_WEIGHTS_NAME,
     HF_HUB_OFFLINE,
+    MIN_PEFT_VERSION,
     SAFETENSORS_WEIGHTS_NAME,
     WEIGHTS_NAME,
     _add_variant,
     _get_model_file,
+    check_peft_version,
     deprecate,
     is_accelerate_available,
     is_torch_version,
@@ -187,6 +189,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
     _automatically_saved_args = ["_diffusers_version", "_class_name", "_name_or_path"]
     _supports_gradient_checkpointing = False
     _keys_to_ignore_on_load_unexpected = None
+    _hf_peft_config_loaded = False
 
     def __init__(self):
         super().__init__()
@@ -292,6 +295,133 @@ def disable_xformers_memory_efficient_attention(self):
         """
         self.set_use_memory_efficient_attention_xformers(False)
 
+    def add_adapter(self, adapter_config, adapter_name: Optional[str] = None) -> None:
+        r"""
+        If you are not familiar with adapters and PEFT methods, we invite you to read more about them on the PEFT
+        official documentation: https://huggingface.co/docs/peft
+
+        Adds a fresh new adapter to the current model for training purpose. If no adapter name is passed, a default
+        name is assigned to the adapter to follow the convention of PEFT library (in PEFT we use "default" as the
+        default adapter name).
+
+        Args:
+            adapter_config (`~peft.PeftConfig`):
+                The configuration of the adapter to add, supported adapters are non-prefix tuning and adaption prompts
+                methods
+            adapter_name (`str`, *optional*, defaults to `"default"`):
+                The name of the adapter to add. If no name is passed, a default name is assigned to the adapter.
+        """
+        check_peft_version(min_version=MIN_PEFT_VERSION)
+
+        from peft import PeftConfig, inject_adapter_in_model
+
+        adapter_name = adapter_name or "default"
+
+        if not self._hf_peft_config_loaded:
+            self._hf_peft_config_loaded = True
+        elif adapter_name in self.peft_config:
+            raise ValueError(f"Adapter with name {adapter_name} already exists. Please use a different name.")
+
+        if not isinstance(adapter_config, PeftConfig):
+            raise ValueError(
+                f"adapter_config should be an instance of PeftConfig. Got {type(adapter_config)} instead."
+            )
+
+        # Unlike transformers, here we don't need to retrieve the name_or_path of the unet as the loading logic is
+        # handled by the `load_lora_layers` or `LoraLoaderMixin`.
+        # @patrickvonplaten @sayakpaul do we have an equivalent of `model.config.name_or_path` in diffusers?
+        adapter_config.base_model_name_or_path = None
+        inject_adapter_in_model(adapter_config, self, adapter_name)
+        self.set_adapter(adapter_name)
+
+    def set_adapter(self, adapter_name: str) -> None:
+        """
+        If you are not familiar with adapters and PEFT methods, we invite you to read more about them on the PEFT
+        official documentation: https://huggingface.co/docs/peft
+
+        Sets a specific adapter by forcing the model to use a that adapter and disable the other adapters.
+
+        Args:
+            adapter_name (`str`):
+                The name of the adapter to set.
+        """
+        check_peft_version(min_version=MIN_PEFT_VERSION)
+
+        if not self._hf_peft_config_loaded:
+            raise ValueError("No adapter loaded. Please load an adapter first.")
+        elif adapter_name not in self.peft_config:
+            raise ValueError(
+                f"Adapter with name {adapter_name} not found. Please pass the correct adapter name among {list(self.peft_config.keys())}"
+            )
+
+        from peft.tuners.tuners_utils import BaseTunerLayer
+
+        _adapters_has_been_set = False
+
+        for _, module in self.named_modules():
+            if isinstance(module, BaseTunerLayer):
+                module.active_adapter = adapter_name
+                _adapters_has_been_set = True
+
+        if not _adapters_has_been_set:
+            raise ValueError(
+                "Did not succeeded in setting the adapter. Please make sure you are using a model that supports adapters."
+            )
+
+    def disable_adapters(self) -> None:
+        r"""
+        If you are not familiar with adapters and PEFT methods, we invite you to read more about them on the PEFT
+        official documentation: https://huggingface.co/docs/peft
+
+        Disable all adapters that are attached to the model. This leads to inferring with the base model only.
+        """
+        check_peft_version(min_version=MIN_PEFT_VERSION)
+
+        if not self._hf_peft_config_loaded:
+            raise ValueError("No adapter loaded. Please load an adapter first.")
+
+        from peft.tuners.tuners_utils import BaseTunerLayer
+
+        for _, module in self.named_modules():
+            if isinstance(module, BaseTunerLayer):
+                module.disable_adapters = True
+
+    def enable_adapters(self) -> None:
+        """
+        If you are not familiar with adapters and PEFT methods, we invite you to read more about them on the PEFT
+        official documentation: https://huggingface.co/docs/peft
+
+        Enable adapters that are attached to the model. The model will use `self.active_adapter()`
+        """
+        check_peft_version(min_version=MIN_PEFT_VERSION)
+
+        if not self._hf_peft_config_loaded:
+            raise ValueError("No adapter loaded. Please load an adapter first.")
+
+        from peft.tuners.tuners_utils import BaseTunerLayer
+
+        for _, module in self.named_modules():
+            if isinstance(module, BaseTunerLayer):
+                module.disable_adapters = False
+
+    def active_adapter(self) -> str:
+        """
+        If you are not familiar with adapters and PEFT methods, we invite you to read more about them on the PEFT
+        official documentation: https://huggingface.co/docs/peft
+
+        Gets the current active adapter of the model.
+        """
+        check_peft_version(min_version=MIN_PEFT_VERSION)
+
+        if not self._hf_peft_config_loaded:
+            raise ValueError("No adapter loaded. Please load an adapter first.")
+
+        from peft.tuners.tuners_utils import BaseTunerLayer
+
+        for _, module in self.named_modules():
+            if isinstance(module, BaseTunerLayer):
+                return module.active_adapter
+
     def save_pretrained(
         self,
         save_directory: Union[str, os.PathLike],
diff --git a/src/diffusers/utils/__init__.py b/src/diffusers/utils/__init__.py
index 4eb93b04779e..6aa88f54aa99 100644
--- a/src/diffusers/utils/__init__.py
+++ b/src/diffusers/utils/__init__.py
@@ -83,7 +83,13 @@
 from .loading_utils import load_image
 from .logging import get_logger
 from .outputs import BaseOutput
-from .peft_utils import recurse_remove_peft_layers, scale_peft_layers, unscale_peft_layers
+from .peft_utils import (
+    MIN_PEFT_VERSION,
+    check_peft_version,
+    recurse_remove_peft_layers,
+    scale_peft_layers,
+    unscale_peft_layers,
+)
 from .pil_utils import PIL_INTERPOLATION, make_image_grid, numpy_to_pil, pt_to_pil
 from .state_dict_utils import (
     convert_state_dict_to_diffusers,
diff --git a/src/diffusers/utils/peft_utils.py b/src/diffusers/utils/peft_utils.py
index e61c0025ac5d..3991052519d4 100644
--- a/src/diffusers/utils/peft_utils.py
+++ b/src/diffusers/utils/peft_utils.py
@@ -14,13 +14,20 @@
 """
 PEFT utilities: Utilities related to peft library
 """
-from .import_utils import is_torch_available
+import importlib
+
+from packaging import version
+
+from .import_utils import is_peft_available, is_torch_available
 
 
 if is_torch_available():
     import torch
 
 
+MIN_PEFT_VERSION = "0.5.0"
+
+
 def recurse_remove_peft_layers(model):
     r"""
     Recursively replace all instances of `LoraLayer` with corresponding new layers in `model`.
@@ -87,3 +94,23 @@ def unscale_peft_layers(model, scale: float = None):
         for module in model.modules():
             if isinstance(module, BaseTunerLayer):
                 module.scaling[module.active_adapter] /= scale
+
+
+def check_peft_version(min_version: str) -> None:
+    r"""
+    Checks if the version of PEFT is compatible.
+
+    Args:
+        version (`str`):
+            The version of PEFT to check against.
+    """
+    if not is_peft_available():
+        raise ValueError("PEFT is not installed. Please install it with `pip install peft`")
+
+    is_peft_version_compatible = version.parse(importlib.metadata.version("peft")) > version.parse(min_version)
+
+    if not is_peft_version_compatible:
+        raise ValueError(
+            f"The version of PEFT you are using is not compatible, please use a version that is greater"
+            f" than {min_version}"
+        )
diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index 1862437fce88..8f08b86da6f5 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -94,6 +94,10 @@ def get_dummy_components(self):
             r=4, lora_alpha=4, target_modules=["q_proj", "k_proj", "v_proj", "out_proj"], init_lora_weights=False
         )
 
+        unet_lora_config = LoraConfig(
+            r=4, lora_alpha=4, target_modules=["to_q", "to_k", "to_v", "to_out.0"], init_lora_weights=False
+        )
+
         unet_lora_attn_procs, unet_lora_layers = create_unet_lora_layers(unet)
 
         if self.has_two_text_encoders:
@@ -120,7 +124,7 @@ def get_dummy_components(self):
             "unet_lora_layers": unet_lora_layers,
             "unet_lora_attn_procs": unet_lora_attn_procs,
         }
-        return pipeline_components, lora_components, text_lora_config
+        return pipeline_components, lora_components, text_lora_config, unet_lora_config
 
     def get_dummy_inputs(self, with_generator=True):
         batch_size = 1
@@ -166,7 +170,7 @@ def test_simple_inference(self):
         """
         Tests a simple inference and makes sure it works as expected
         """
-        components, _, _ = self.get_dummy_components()
+        components, _, _, _ = self.get_dummy_components()
         pipe = self.pipeline_class(**components)
         pipe = pipe.to(self.torch_device)
         pipe.set_progress_bar_config(disable=None)
@@ -180,7 +184,7 @@ def test_simple_inference_with_text_lora(self):
         Tests a simple inference with lora attached on the text encoder
         and makes sure it works as expected
         """
-        components, _, text_lora_config = self.get_dummy_components()
+        components, _, text_lora_config, _ = self.get_dummy_components()
         pipe = self.pipeline_class(**components)
         pipe = pipe.to(self.torch_device)
         pipe.set_progress_bar_config(disable=None)
@@ -208,7 +212,7 @@ def test_simple_inference_with_text_lora_and_scale(self):
         Tests a simple inference with lora attached on the text encoder + scale argument
         and makes sure it works as expected
         """
-        components, _, text_lora_config = self.get_dummy_components()
+        components, _, text_lora_config, _ = self.get_dummy_components()
         pipe = self.pipeline_class(**components)
         pipe = pipe.to(self.torch_device)
         pipe.set_progress_bar_config(disable=None)
@@ -252,7 +256,7 @@ def test_simple_inference_with_text_lora_fused(self):
         Tests a simple inference with lora attached into text encoder + fuses the lora weights into base model
         and makes sure it works as expected
         """
-        components, _, text_lora_config = self.get_dummy_components()
+        components, _, text_lora_config, _ = self.get_dummy_components()
         pipe = self.pipeline_class(**components)
         pipe = pipe.to(self.torch_device)
         pipe.set_progress_bar_config(disable=None)
@@ -289,7 +293,7 @@ def test_simple_inference_with_text_lora_unloaded(self):
         Tests a simple inference with lora attached to text encoder, then unloads the lora weights
         and makes sure it works as expected
         """
-        components, _, text_lora_config = self.get_dummy_components()
+        components, _, text_lora_config, _ = self.get_dummy_components()
         pipe = self.pipeline_class(**components)
         pipe = pipe.to(self.torch_device)
         pipe.set_progress_bar_config(disable=None)
@@ -327,7 +331,7 @@ def test_simple_inference_with_text_lora_save_load(self):
         """
         Tests a simple usecase where users could use saving utilities for LoRA.
         """
-        components, _, text_lora_config = self.get_dummy_components()
+        components, _, text_lora_config, _ = self.get_dummy_components()
         pipe = self.pipeline_class(**components)
         pipe = pipe.to(self.torch_device)
         pipe.set_progress_bar_config(disable=None)
@@ -387,7 +391,7 @@ def test_simple_inference_save_pretrained(self):
         """
         Tests a simple usecase where users could use saving utilities for LoRA through save_pretrained
         """
-        components, _, text_lora_config = self.get_dummy_components()
+        components, _, text_lora_config, _ = self.get_dummy_components()
         pipe = self.pipeline_class(**components)
         pipe = pipe.to(self.torch_device)
         pipe.set_progress_bar_config(disable=None)
@@ -431,6 +435,73 @@ def test_simple_inference_save_pretrained(self):
             "Loading from saved checkpoints should give same results.",
         )
 
+    def test_simple_inference_with_text_unet_lora_save_load(self):
+        """
+        Tests a simple usecase where users could use saving utilities for LoRA for Unet + text encoder
+        """
+        components, _, text_lora_config, unet_lora_config = self.get_dummy_components()
+        pipe = self.pipeline_class(**components)
+        pipe = pipe.to(self.torch_device)
+        pipe.set_progress_bar_config(disable=None)
+        _, _, inputs = self.get_dummy_inputs(with_generator=False)
+
+        output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
+        self.assertTrue(output_no_lora.shape == (1, 64, 64, 3))
+
+        pipe.text_encoder.add_adapter(text_lora_config)
+        pipe.unet.add_adapter(unet_lora_config)
+
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
+
+        if self.has_two_text_encoders:
+            pipe.text_encoder_2.add_adapter(text_lora_config)
+            self.assertTrue(
+                self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
+            )
+
+        images_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
+
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            text_encoder_state_dict = get_peft_model_state_dict(pipe.text_encoder)
+            unet_state_dict = get_peft_model_state_dict(pipe.unet)
+            if self.has_two_text_encoders:
+                text_encoder_2_state_dict = get_peft_model_state_dict(pipe.text_encoder_2)
+
+                self.pipeline_class.save_lora_weights(
+                    save_directory=tmpdirname,
+                    text_encoder_lora_layers=text_encoder_state_dict,
+                    text_encoder_2_lora_layers=text_encoder_2_state_dict,
+                    unet_lora_layers=unet_state_dict,
+                    safe_serialization=False,
+                )
+            else:
+                self.pipeline_class.save_lora_weights(
+                    save_directory=tmpdirname,
+                    text_encoder_lora_layers=text_encoder_state_dict,
+                    unet_lora_layers=unet_state_dict,
+                    safe_serialization=False,
+                )
+
+            self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.bin")))
+            pipe.unload_lora_weights()
+
+            pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.bin"))
+
+        images_lora_from_pretrained = pipe(**inputs, generator=torch.manual_seed(0)).images
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
+
+        if self.has_two_text_encoders:
+            self.assertTrue(
+                self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
+            )
+
+        self.assertTrue(
+            np.allclose(images_lora, images_lora_from_pretrained, atol=1e-3, rtol=1e-3),
+            "Loading from saved checkpoints should give same results.",
+        )
+
 
 class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
     pipeline_class = StableDiffusionPipeline

From c90aedc5cbbd8bbdf2d400b8aa66fd9a7c449441 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Mon, 25 Sep 2023 15:42:27 +0000
Subject: [PATCH 003/134] fix CI

---
 src/diffusers/models/resnet.py | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/src/diffusers/models/resnet.py b/src/diffusers/models/resnet.py
index be80157b93b6..914a31951836 100644
--- a/src/diffusers/models/resnet.py
+++ b/src/diffusers/models/resnet.py
@@ -166,22 +166,17 @@ def forward(self, hidden_states, output_size=None, scale: float = 1.0):
             hidden_states = hidden_states.to(dtype)
 
         # TODO(Suraj, Patrick) - clean up after weight dicts are correctly renamed
-        if self.use_conv and not USE_PEFT_BACKEND:
+        if self.use_conv:
             if self.name == "conv":
-                if isinstance(self.conv, LoRACompatibleConv):
+                if isinstance(self.conv, LoRACompatibleConv) and not USE_PEFT_BACKEND:
                     hidden_states = self.conv(hidden_states, scale)
                 else:
                     hidden_states = self.conv(hidden_states)
             else:
-                if isinstance(self.Conv2d_0, LoRACompatibleConv):
+                if isinstance(self.Conv2d_0, LoRACompatibleConv) and not USE_PEFT_BACKEND:
                     hidden_states = self.Conv2d_0(hidden_states, scale)
                 else:
                     hidden_states = self.Conv2d_0(hidden_states)
-        else:
-            if self.name == "conv":
-                hidden_states = self.conv(hidden_states)
-            else:
-                hidden_states = self.Conv2d_0(hidden_states)
 
         return hidden_states
 

From 3002ea3f51bfc003699f1c2b42746a01b037f227 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Mon, 25 Sep 2023 16:17:12 +0000
Subject: [PATCH 004/134] add tests + v1 `PeftLayerScaler`

---
 src/diffusers/loaders.py                      |  10 +
 .../pipeline_stable_diffusion.py              | 186 +++++++++---------
 src/diffusers/utils/__init__.py               |   2 +
 src/diffusers/utils/generic.py                |  36 ++++
 src/diffusers/utils/peft_utils.py             |  21 ++
 tests/lora/test_lora_layers_peft.py           |  46 +++++
 6 files changed, 209 insertions(+), 92 deletions(-)
 create mode 100644 src/diffusers/utils/generic.py

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index 458623a94827..fca737ddb0f9 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -2214,6 +2214,16 @@ def unfuse_text_encoder_lora(text_encoder):
 
         self.num_fused_loras -= 1
 
+    @property
+    def lora_modules_to_scale(self):
+        """
+        Returns the list of the LoRA modules to scale
+        """
+        lora_modules = [self.text_encoder, self.unet]
+        if hasattr(self, "text_encoder_2"):
+            lora_modules.append(self.text_encoder_2)
+        return lora_modules
+
 
 class FromSingleFileMixin:
     """
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
index 570e68648d47..565872ebbaec 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
@@ -25,7 +25,7 @@
 from ...models import AutoencoderKL, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import deprecate, logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
+from ...utils import ContextManagers, PeftLayerScaler, deprecate, logging, replace_example_docstring
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import StableDiffusionPipelineOutput
@@ -625,6 +625,8 @@ def __call__(
         # 0. Default height and width to unet
         height = height or self.unet.config.sample_size * self.vae_scale_factor
         width = width or self.unet.config.sample_size * self.vae_scale_factor
+        # to deal with lora scaling and other possible forward hooks
+        forward_context_managers = []
 
         # 1. Check inputs. Raise error if not correct
         self.check_inputs(
@@ -646,104 +648,104 @@ def __call__(
         do_classifier_free_guidance = guidance_scale > 1.0
 
         # 3. Encode input prompt
-        lora_scale = cross_attention_kwargs.get("scale", None) if cross_attention_kwargs is not None else None
-
         if self.use_peft_backend:
-            scale_peft_layers(self.text_encoder, lora_scale)
-            scale_peft_layers(self.unet, lora_scale)
-
-        prompt_embeds, negative_prompt_embeds = self.encode_prompt(
-            prompt,
-            device,
-            num_images_per_prompt,
-            do_classifier_free_guidance,
-            negative_prompt,
-            prompt_embeds=prompt_embeds,
-            negative_prompt_embeds=negative_prompt_embeds,
-            lora_scale=lora_scale,
-            clip_skip=clip_skip,
-        )
-        # For classifier free guidance, we need to do two forward passes.
-        # Here we concatenate the unconditional and text embeddings into a single batch
-        # to avoid doing two forward passes
-        if do_classifier_free_guidance:
-            prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
-
-        # 4. Prepare timesteps
-        self.scheduler.set_timesteps(num_inference_steps, device=device)
-        timesteps = self.scheduler.timesteps
-
-        # 5. Prepare latent variables
-        num_channels_latents = self.unet.config.in_channels
-        latents = self.prepare_latents(
-            batch_size * num_images_per_prompt,
-            num_channels_latents,
-            height,
-            width,
-            prompt_embeds.dtype,
-            device,
-            generator,
-            latents,
-        )
+            # Make sure we pop it so that it is used nowhere later
+            lora_scale = cross_attention_kwargs.pop("scale", None) if cross_attention_kwargs is not None else None
 
-        # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
-        extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
-
-        # 7. Denoising loop
-        num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
-        with self.progress_bar(total=num_inference_steps) as progress_bar:
-            for i, t in enumerate(timesteps):
-                # expand the latents if we are doing classifier free guidance
-                latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
-                latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
-
-                # predict the noise residual
-                noise_pred = self.unet(
-                    latent_model_input,
-                    t,
-                    encoder_hidden_states=prompt_embeds,
-                    cross_attention_kwargs=cross_attention_kwargs,
-                    return_dict=False,
-                )[0]
-
-                # perform guidance
-                if do_classifier_free_guidance:
-                    noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
-                    noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
-
-                if do_classifier_free_guidance and guidance_rescale > 0.0:
-                    # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
-                    noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale)
-
-                # compute the previous noisy sample x_t -> x_t-1
-                latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
-
-                # call the callback, if provided
-                if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
-                    progress_bar.update()
-                    if callback is not None and i % callback_steps == 0:
-                        callback(i, t, latents)
-
-        if not output_type == "latent":
-            image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
-            image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype)
+            scaling_context_manager = PeftLayerScaler(self.lora_modules_to_scale, lora_scale)
+            forward_context_managers.append(scaling_context_manager)
         else:
-            image = latents
-            has_nsfw_concept = None
+            lora_scale = cross_attention_kwargs.get("scale", None) if cross_attention_kwargs is not None else None
 
-        if has_nsfw_concept is None:
-            do_denormalize = [True] * image.shape[0]
-        else:
-            do_denormalize = [not has_nsfw for has_nsfw in has_nsfw_concept]
+        with ContextManagers(forward_context_managers):
+            prompt_embeds, negative_prompt_embeds = self.encode_prompt(
+                prompt,
+                device,
+                num_images_per_prompt,
+                do_classifier_free_guidance,
+                negative_prompt,
+                prompt_embeds=prompt_embeds,
+                negative_prompt_embeds=negative_prompt_embeds,
+                lora_scale=lora_scale,
+                clip_skip=clip_skip,
+            )
+            # For classifier free guidance, we need to do two forward passes.
+            # Here we concatenate the unconditional and text embeddings into a single batch
+            # to avoid doing two forward passes
+            if do_classifier_free_guidance:
+                prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
+
+            # 4. Prepare timesteps
+            self.scheduler.set_timesteps(num_inference_steps, device=device)
+            timesteps = self.scheduler.timesteps
+
+            # 5. Prepare latent variables
+            num_channels_latents = self.unet.config.in_channels
+            latents = self.prepare_latents(
+                batch_size * num_images_per_prompt,
+                num_channels_latents,
+                height,
+                width,
+                prompt_embeds.dtype,
+                device,
+                generator,
+                latents,
+            )
 
-        image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize)
+            # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
+            extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
+
+            # 7. Denoising loop
+            num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
+            with self.progress_bar(total=num_inference_steps) as progress_bar:
+                for i, t in enumerate(timesteps):
+                    # expand the latents if we are doing classifier free guidance
+                    latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
+                    latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
+
+                    # predict the noise residual
+                    noise_pred = self.unet(
+                        latent_model_input,
+                        t,
+                        encoder_hidden_states=prompt_embeds,
+                        cross_attention_kwargs=cross_attention_kwargs,
+                        return_dict=False,
+                    )[0]
+
+                    # perform guidance
+                    if do_classifier_free_guidance:
+                        noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+                        noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
+
+                    if do_classifier_free_guidance and guidance_rescale > 0.0:
+                        # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
+                        noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale)
+
+                    # compute the previous noisy sample x_t -> x_t-1
+                    latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
+
+                    # call the callback, if provided
+                    if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
+                        progress_bar.update()
+                        if callback is not None and i % callback_steps == 0:
+                            callback(i, t, latents)
+
+            if not output_type == "latent":
+                image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
+                image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype)
+            else:
+                image = latents
+                has_nsfw_concept = None
+
+            if has_nsfw_concept is None:
+                do_denormalize = [True] * image.shape[0]
+            else:
+                do_denormalize = [not has_nsfw for has_nsfw in has_nsfw_concept]
 
-        # Offload all models
-        self.maybe_free_model_hooks()
+            image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize)
 
-        if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
-            unscale_peft_layers(self.unet, lora_scale)
+            # Offload all models
+            self.maybe_free_model_hooks()
 
         if not return_dict:
             return (image, has_nsfw_concept)
diff --git a/src/diffusers/utils/__init__.py b/src/diffusers/utils/__init__.py
index 6aa88f54aa99..fc2ea8a641cc 100644
--- a/src/diffusers/utils/__init__.py
+++ b/src/diffusers/utils/__init__.py
@@ -35,6 +35,7 @@
 from .doc_utils import replace_example_docstring
 from .dynamic_modules_utils import get_class_from_dynamic_module
 from .export_utils import export_to_gif, export_to_obj, export_to_ply, export_to_video
+from .generic import ContextManagers
 from .hub_utils import (
     HF_HUB_OFFLINE,
     PushToHubMixin,
@@ -85,6 +86,7 @@
 from .outputs import BaseOutput
 from .peft_utils import (
     MIN_PEFT_VERSION,
+    PeftLayerScaler,
     check_peft_version,
     recurse_remove_peft_layers,
     scale_peft_layers,
diff --git a/src/diffusers/utils/generic.py b/src/diffusers/utils/generic.py
new file mode 100644
index 000000000000..8e562b2ec919
--- /dev/null
+++ b/src/diffusers/utils/generic.py
@@ -0,0 +1,36 @@
+# Copyright 2022 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Generic utilities
+"""
+from contextlib import ExitStack
+from typing import ContextManager, List
+
+
+class ContextManagers:
+    """
+    Wrapper for `contextlib.ExitStack` which enters a collection of context managers. Adaptation of `ContextManagers`
+    in the `fastcore` library - also exists in transformers library.
+    """
+
+    def __init__(self, context_managers: List[ContextManager]):
+        self.context_managers = context_managers
+        self.stack = ExitStack()
+
+    def __enter__(self):
+        for context_manager in self.context_managers:
+            self.stack.enter_context(context_manager)
+
+    def __exit__(self, *args, **kwargs):
+        self.stack.__exit__(*args, **kwargs)
diff --git a/src/diffusers/utils/peft_utils.py b/src/diffusers/utils/peft_utils.py
index 3991052519d4..c18ff0364bc3 100644
--- a/src/diffusers/utils/peft_utils.py
+++ b/src/diffusers/utils/peft_utils.py
@@ -15,6 +15,7 @@
 PEFT utilities: Utilities related to peft library
 """
 import importlib
+from typing import List
 
 from packaging import version
 
@@ -96,6 +97,26 @@ def unscale_peft_layers(model, scale: float = None):
                 module.scaling[module.active_adapter] /= scale
 
 
+class PeftLayerScaler:
+    r"""
+    A custom context manager that scale / unscale PEFT layers before and after the forward pass.
+    """
+
+    def __init__(self, modules_to_scale: List[torch.nn.Module], scale: float = None):
+        self.modules_to_scale = modules_to_scale
+        self.scale = scale
+
+    def __enter__(self, *args, **kwargs):
+        if self.scale is not None and self.scale != 1.0:
+            for submodule in self.modules_to_scale:
+                scale_peft_layers(submodule, self.scale)
+
+    def __exit__(self, *args, **kwargs):
+        if self.scale is not None and self.scale != 1.0 and self.scale != 0.0:
+            for submodule in self.modules_to_scale:
+                unscale_peft_layers(submodule, self.scale)
+
+
 def check_peft_version(min_version: str) -> None:
     r"""
     Checks if the version of PEFT is compatible.
diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index 8f08b86da6f5..414372e0f336 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -502,6 +502,52 @@ def test_simple_inference_with_text_unet_lora_save_load(self):
             "Loading from saved checkpoints should give same results.",
         )
 
+    def test_simple_inference_with_text_unet_lora_and_scale(self):
+        """
+        Tests a simple inference with lora attached on the text encoder + Unet + scale argument
+        and makes sure it works as expected
+        """
+        components, _, text_lora_config, unet_lora_config = self.get_dummy_components()
+        pipe = self.pipeline_class(**components)
+        pipe = pipe.to(self.torch_device)
+        pipe.set_progress_bar_config(disable=None)
+        _, _, inputs = self.get_dummy_inputs(with_generator=False)
+
+        output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
+        self.assertTrue(output_no_lora.shape == (1, 64, 64, 3))
+
+        pipe.text_encoder.add_adapter(text_lora_config)
+        pipe.unet.add_adapter(unet_lora_config)
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
+
+        if self.has_two_text_encoders:
+            pipe.text_encoder_2.add_adapter(text_lora_config)
+            self.assertTrue(
+                self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
+            )
+
+        output_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
+        self.assertTrue(
+            not np.allclose(output_lora, output_no_lora, atol=1e-3, rtol=1e-3), "Lora should change the output"
+        )
+
+        output_lora_scale = pipe(
+            **inputs, generator=torch.manual_seed(0), cross_attention_kwargs={"scale": 0.5}
+        ).images
+        self.assertTrue(
+            not np.allclose(output_lora, output_lora_scale, atol=1e-3, rtol=1e-3),
+            "Lora + scale should change the output",
+        )
+
+        output_lora_0_scale = pipe(
+            **inputs, generator=torch.manual_seed(0), cross_attention_kwargs={"scale": 0.0}
+        ).images
+        self.assertTrue(
+            np.allclose(output_no_lora, output_lora_0_scale, atol=1e-3, rtol=1e-3),
+            "Lora + 0 scale should lead to same result as no LoRA",
+        )
+
 
 class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
     pipeline_class = StableDiffusionPipeline

From 64ca2bb63053094bfc9d4e72d9528cd9d1607622 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Mon, 25 Sep 2023 16:37:51 +0000
Subject: [PATCH 005/134] style

---
 src/diffusers/utils/peft_utils.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/diffusers/utils/peft_utils.py b/src/diffusers/utils/peft_utils.py
index 1d8a86a56b53..c10c446a673b 100644
--- a/src/diffusers/utils/peft_utils.py
+++ b/src/diffusers/utils/peft_utils.py
@@ -21,11 +21,13 @@
 
 from .import_utils import is_peft_available, is_torch_available
 
+
 if is_torch_available():
     import torch
 
 MIN_PEFT_VERSION = "0.5.0"
 
+
 def recurse_remove_peft_layers(model):
     if is_torch_available():
         import torch

From f62e506de80a2865e4ea3b7e22101421b73078e9 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Mon, 25 Sep 2023 16:59:17 +0000
Subject: [PATCH 006/134] add scale retrieving mechanism system

---
 src/diffusers/loaders.py            |  9 +++++-
 src/diffusers/utils/peft_utils.py   | 45 +++++++++++++++++++++++++++++
 tests/lora/test_lora_layers_peft.py |  5 ++++
 3 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index fca737ddb0f9..7cbc2974e676 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -2219,7 +2219,14 @@ def lora_modules_to_scale(self):
         """
         Returns the list of the LoRA modules to scale
         """
-        lora_modules = [self.text_encoder, self.unet]
+        lora_modules = []
+
+        if hasattr(self, "text_encoder"):
+            lora_modules.append(self.text_encoder)
+
+        if hasattr(self, "unet"):
+            lora_modules.append(self.unet)
+
         if hasattr(self, "text_encoder_2"):
             lora_modules.append(self.text_encoder_2)
         return lora_modules
diff --git a/src/diffusers/utils/peft_utils.py b/src/diffusers/utils/peft_utils.py
index c10c446a673b..c14cd9940ead 100644
--- a/src/diffusers/utils/peft_utils.py
+++ b/src/diffusers/utils/peft_utils.py
@@ -82,21 +82,66 @@ def recurse_remove_peft_layers(model):
 
 
 def scale_peft_layers(model, scale: float = None):
+    r"""
+    Scale peft layers - Loops over the modules of the model and scale the layers that are of type `BaseTunerLayer`. We
+    also store the original scale factor in case we multiply it by zero.
+
+    Args:
+        model (`torch.nn.Module`):
+            The model to scale.
+        scale (`float`, *optional*):
+            The scale factor to use.
+    """
     from peft.tuners.tuners_utils import BaseTunerLayer
 
     if scale is not None and scale != 1.0:
         for module in model.modules():
             if isinstance(module, BaseTunerLayer):
+                original_scale = module.scaling[module.active_adapter]
+
+                # Store the previous scale in case we multiply it by zero
+                if "_hf_peft_original_scales" not in module.scaling:
+                    module.scaling["_hf_peft_original_scales"] = {module.active_adapter: original_scale}
+                else:
+                    module.scaling["_hf_peft_original_scales"][module.active_adapter] = original_scale
+
                 module.scaling[module.active_adapter] *= scale
 
 
 def unscale_peft_layers(model, scale: float = None):
+    r"""
+    Un-scale peft layers - in case the modules has been zero-ed by a zero factor we retrieve the previous scale and
+    restore it. Otherwise, assuming the user uses the same scale factor, we just divide by the scale factor.
+
+    Args:
+        model (`torch.nn.Module`):
+            The model to unscale.
+        scale (`float`, *optional*):
+            The scale factor to use. If 0.0 is passed, we retrieve the original scale factor. In order to retrieve the
+            original factor the user needs first to call `scale_peft_layers` with the same scale factor.
+    """
     from peft.tuners.tuners_utils import BaseTunerLayer
 
     if scale is not None and scale != 1.0 and scale != 0.0:
         for module in model.modules():
             if isinstance(module, BaseTunerLayer):
                 module.scaling[module.active_adapter] /= scale
+    elif scale is not None and scale == 0.0:
+        for module in model.modules():
+            if isinstance(module, BaseTunerLayer):
+                if "_hf_peft_original_scales" not in module.scaling:
+                    raise ValueError(
+                        "The layer has not been scaled, cannot unscale it - please call first `scale_peft_layers`"
+                    )
+
+                original_scale = module.scaling["_hf_peft_original_scales"][module.active_adapter]
+                module.scaling[module.active_adapter] = original_scale
+
+                del module.scaling["_hf_peft_original_scales"][module.active_adapter]
+
+                # Clean up ..
+                if len(module.scaling["_hf_peft_original_scales"]) == 0:
+                    del module.scaling["_hf_peft_original_scales"]
 
 
 class PeftLayerScaler:
diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index 414372e0f336..18b0199ebc93 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -548,6 +548,11 @@ def test_simple_inference_with_text_unet_lora_and_scale(self):
             "Lora + 0 scale should lead to same result as no LoRA",
         )
 
+        self.assertTrue(
+            pipe.text_encoder.text_model.encoder.layers[0].self_attn.q_proj.scaling["default"] == 1.0,
+            "The scaling parameter has not been correctly restored!",
+        )
+
 
 class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
     pipeline_class = StableDiffusionPipeline

From 48842c0a3c621e9b378f374fc457ffb20e612d26 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Mon, 25 Sep 2023 17:01:17 +0000
Subject: [PATCH 007/134] fix CI

---
 src/diffusers/utils/peft_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diffusers/utils/peft_utils.py b/src/diffusers/utils/peft_utils.py
index c14cd9940ead..9a8f5d4ada2f 100644
--- a/src/diffusers/utils/peft_utils.py
+++ b/src/diffusers/utils/peft_utils.py
@@ -149,7 +149,7 @@ class PeftLayerScaler:
     A custom context manager that scale / unscale PEFT layers before and after the forward pass.
     """
 
-    def __init__(self, modules_to_scale: List[torch.nn.Module], scale: float = None):
+    def __init__(self, modules_to_scale: List["torch.nn.Module"], scale: float = None):
         self.modules_to_scale = modules_to_scale
         self.scale = scale
 

From 1fb4aa29e35c90ecaaeae1e2197d7aaea003edc9 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Mon, 25 Sep 2023 17:03:42 +0000
Subject: [PATCH 008/134] up

---
 src/diffusers/utils/peft_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diffusers/utils/peft_utils.py b/src/diffusers/utils/peft_utils.py
index 9a8f5d4ada2f..4a662752820e 100644
--- a/src/diffusers/utils/peft_utils.py
+++ b/src/diffusers/utils/peft_utils.py
@@ -159,7 +159,7 @@ def __enter__(self, *args, **kwargs):
                 scale_peft_layers(submodule, self.scale)
 
     def __exit__(self, *args, **kwargs):
-        if self.scale is not None and self.scale != 1.0 and self.scale != 0.0:
+        if self.scale is not None and self.scale != 1.0:
             for submodule in self.modules_to_scale:
                 unscale_peft_layers(submodule, self.scale)
 

From 4c803f666c6c6d986ea16fdc3f1b83ea82cc76a3 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Wed, 27 Sep 2023 08:56:17 +0000
Subject: [PATCH 009/134] up

---
 src/diffusers/utils/peft_utils.py | 39 ++++++++++++++++++++++---------
 1 file changed, 28 insertions(+), 11 deletions(-)

diff --git a/src/diffusers/utils/peft_utils.py b/src/diffusers/utils/peft_utils.py
index 4a662752820e..b9a238201282 100644
--- a/src/diffusers/utils/peft_utils.py
+++ b/src/diffusers/utils/peft_utils.py
@@ -97,15 +97,21 @@ def scale_peft_layers(model, scale: float = None):
     if scale is not None and scale != 1.0:
         for module in model.modules():
             if isinstance(module, BaseTunerLayer):
-                original_scale = module.scaling[module.active_adapter]
+                # To deal with previous PEFT versions
+                active_adapters = module.active_adapter
+                if isinstance(active_adapters, str):
+                    active_adapters = [active_adapters]
 
-                # Store the previous scale in case we multiply it by zero
-                if "_hf_peft_original_scales" not in module.scaling:
-                    module.scaling["_hf_peft_original_scales"] = {module.active_adapter: original_scale}
-                else:
-                    module.scaling["_hf_peft_original_scales"][module.active_adapter] = original_scale
+                for active_adapter in active_adapters:
+                    original_scale = module.scaling[active_adapter]
+
+                    # Store the previous scale in case we multiply it by zero
+                    if "_hf_peft_original_scales" not in module.scaling:
+                        module.scaling["_hf_peft_original_scales"] = {active_adapter: original_scale}
+                    else:
+                        module.scaling["_hf_peft_original_scales"][active_adapter] = original_scale
 
-                module.scaling[module.active_adapter] *= scale
+                    module.scaling[active_adapter] *= scale
 
 
 def unscale_peft_layers(model, scale: float = None):
@@ -125,7 +131,13 @@ def unscale_peft_layers(model, scale: float = None):
     if scale is not None and scale != 1.0 and scale != 0.0:
         for module in model.modules():
             if isinstance(module, BaseTunerLayer):
-                module.scaling[module.active_adapter] /= scale
+                # To deal with previous PEFT versions
+                active_adapters = module.active_adapter
+                if isinstance(active_adapters, str):
+                    active_adapters = [active_adapters]
+
+                for active_adapter in active_adapters:
+                    module.scaling[active_adapter] /= scale
     elif scale is not None and scale == 0.0:
         for module in model.modules():
             if isinstance(module, BaseTunerLayer):
@@ -133,11 +145,16 @@ def unscale_peft_layers(model, scale: float = None):
                     raise ValueError(
                         "The layer has not been scaled, cannot unscale it - please call first `scale_peft_layers`"
                     )
+                # To deal with previous PEFT versions
+                active_adapters = module.active_adapter
+                if isinstance(active_adapters, str):
+                    active_adapters = [active_adapters]
 
-                original_scale = module.scaling["_hf_peft_original_scales"][module.active_adapter]
-                module.scaling[module.active_adapter] = original_scale
+                for active_adapter in active_adapters:
+                    original_scale = module.scaling["_hf_peft_original_scales"][active_adapter]
+                    module.scaling[active_adapter] = original_scale
 
-                del module.scaling["_hf_peft_original_scales"][module.active_adapter]
+                    del module.scaling["_hf_peft_original_scales"][active_adapter]
 
                 # Clean up ..
                 if len(module.scaling["_hf_peft_original_scales"]) == 0:

From 11a493a5d63411a2893a8a07593242721b5f4469 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Wed, 27 Sep 2023 09:35:07 +0000
Subject: [PATCH 010/134] simple approach --> not same results for some reason

---
 src/diffusers/models/lora.py                  |  25 ++-
 src/diffusers/models/unet_2d_condition.py     |  12 +-
 .../pipeline_stable_diffusion.py              | 191 +++++++++---------
 src/diffusers/utils/__init__.py               |   2 -
 src/diffusers/utils/generic.py                |  36 ----
 src/diffusers/utils/peft_utils.py             |  24 ---
 6 files changed, 116 insertions(+), 174 deletions(-)
 delete mode 100644 src/diffusers/utils/generic.py

diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index 68481dda5aae..cc8e3e231e2b 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -25,19 +25,18 @@
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
 
 
-def adjust_lora_scale_text_encoder(text_encoder, lora_scale: float = 1.0, use_peft_backend: bool = False):
-    if not use_peft_backend:
-        for _, attn_module in text_encoder_attn_modules(text_encoder):
-            if isinstance(attn_module.q_proj, PatchedLoraProjection):
-                attn_module.q_proj.lora_scale = lora_scale
-                attn_module.k_proj.lora_scale = lora_scale
-                attn_module.v_proj.lora_scale = lora_scale
-                attn_module.out_proj.lora_scale = lora_scale
-
-        for _, mlp_module in text_encoder_mlp_modules(text_encoder):
-            if isinstance(mlp_module.fc1, PatchedLoraProjection):
-                mlp_module.fc1.lora_scale = lora_scale
-                mlp_module.fc2.lora_scale = lora_scale
+def adjust_lora_scale_text_encoder(text_encoder, lora_scale: float = 1.0):
+    for _, attn_module in text_encoder_attn_modules(text_encoder):
+        if isinstance(attn_module.q_proj, PatchedLoraProjection):
+            attn_module.q_proj.lora_scale = lora_scale
+            attn_module.k_proj.lora_scale = lora_scale
+            attn_module.v_proj.lora_scale = lora_scale
+            attn_module.out_proj.lora_scale = lora_scale
+
+    for _, mlp_module in text_encoder_mlp_modules(text_encoder):
+        if isinstance(mlp_module.fc1, PatchedLoraProjection):
+            mlp_module.fc1.lora_scale = lora_scale
+            mlp_module.fc2.lora_scale = lora_scale
 
 
 class LoRALinearLayer(nn.Module):
diff --git a/src/diffusers/models/unet_2d_condition.py b/src/diffusers/models/unet_2d_condition.py
index 385f0a42c598..9a9521994197 100644
--- a/src/diffusers/models/unet_2d_condition.py
+++ b/src/diffusers/models/unet_2d_condition.py
@@ -20,7 +20,7 @@
 
 from ..configuration_utils import ConfigMixin, register_to_config
 from ..loaders import UNet2DConditionLoadersMixin
-from ..utils import BaseOutput, logging
+from ..utils import BaseOutput, logging, scale_peft_layers, unscale_peft_layers
 from .activations import get_activation
 from .attention_processor import (
     ADDED_KV_ATTENTION_PROCESSORS,
@@ -939,7 +939,12 @@ def forward(
             cross_attention_kwargs["gligen"] = {"objs": self.position_net(**gligen_args)}
 
         # 3. down
-        lora_scale = cross_attention_kwargs.get("scale", 1.0) if cross_attention_kwargs is not None else 1.0
+        if not self.use_peft_backend:
+            lora_scale = cross_attention_kwargs.get("scale", 1.0) if cross_attention_kwargs is not None else 1.0
+        else:
+            # pop it to not propagate it
+            lora_scale = cross_attention_kwargs.pop("scale", 1.0) if cross_attention_kwargs is not None else 1.0
+            scale_peft_layers(self, lora_scale)
 
         is_controlnet = mid_block_additional_residual is not None and down_block_additional_residuals is not None
         is_adapter = mid_block_additional_residual is None and down_block_additional_residuals is not None
@@ -1039,6 +1044,9 @@ def forward(
             sample = self.conv_act(sample)
         sample = self.conv_out(sample)
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self, lora_scale)
+
         if not return_dict:
             return (sample,)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
index 5eafb0503b30..20f0a5864d04 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
@@ -25,7 +25,7 @@
 from ...models import AutoencoderKL, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import ContextManagers, PeftLayerScaler, deprecate, logging, replace_example_docstring
+from ...utils import deprecate, logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from .pipeline_output import StableDiffusionPipelineOutput
@@ -296,8 +296,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
-            # dynamically adjust the LoRA scale
-            adjust_lora_scale_text_encoder(self.text_encoder, lora_scale, self.use_peft_backend)
+            if not self.use_peft_backend:
+                # dynamically adjust the LoRA scale
+                adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+            else:
+                scale_peft_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -422,6 +425,9 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+
         return prompt_embeds, negative_prompt_embeds
 
     def run_safety_checker(self, image, device, dtype):
@@ -626,7 +632,6 @@ def __call__(
         height = height or self.unet.config.sample_size * self.vae_scale_factor
         width = width or self.unet.config.sample_size * self.vae_scale_factor
         # to deal with lora scaling and other possible forward hooks
-        forward_context_managers = []
 
         # 1. Check inputs. Raise error if not correct
         self.check_inputs(
@@ -648,104 +653,96 @@ def __call__(
         do_classifier_free_guidance = guidance_scale > 1.0
 
         # 3. Encode input prompt
-        if self.use_peft_backend:
-            # Make sure we pop it so that it is used nowhere later
-            lora_scale = cross_attention_kwargs.pop("scale", None) if cross_attention_kwargs is not None else None
+        lora_scale = cross_attention_kwargs.get("scale", None) if cross_attention_kwargs is not None else None
+
+        prompt_embeds, negative_prompt_embeds = self.encode_prompt(
+            prompt,
+            device,
+            num_images_per_prompt,
+            do_classifier_free_guidance,
+            negative_prompt,
+            prompt_embeds=prompt_embeds,
+            negative_prompt_embeds=negative_prompt_embeds,
+            lora_scale=lora_scale,
+            clip_skip=clip_skip,
+        )
+        # For classifier free guidance, we need to do two forward passes.
+        # Here we concatenate the unconditional and text embeddings into a single batch
+        # to avoid doing two forward passes
+        if do_classifier_free_guidance:
+            prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
+
+        # 4. Prepare timesteps
+        self.scheduler.set_timesteps(num_inference_steps, device=device)
+        timesteps = self.scheduler.timesteps
+
+        # 5. Prepare latent variables
+        num_channels_latents = self.unet.config.in_channels
+        latents = self.prepare_latents(
+            batch_size * num_images_per_prompt,
+            num_channels_latents,
+            height,
+            width,
+            prompt_embeds.dtype,
+            device,
+            generator,
+            latents,
+        )
 
-            scaling_context_manager = PeftLayerScaler(self.lora_modules_to_scale, lora_scale)
-            forward_context_managers.append(scaling_context_manager)
+        # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
+        extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
+
+        # 7. Denoising loop
+        num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
+        with self.progress_bar(total=num_inference_steps) as progress_bar:
+            for i, t in enumerate(timesteps):
+                # expand the latents if we are doing classifier free guidance
+                latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
+                latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
+
+                # predict the noise residual
+                noise_pred = self.unet(
+                    latent_model_input,
+                    t,
+                    encoder_hidden_states=prompt_embeds,
+                    cross_attention_kwargs=cross_attention_kwargs,
+                    return_dict=False,
+                )[0]
+
+                # perform guidance
+                if do_classifier_free_guidance:
+                    noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+                    noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
+
+                if do_classifier_free_guidance and guidance_rescale > 0.0:
+                    # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
+                    noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale)
+
+                # compute the previous noisy sample x_t -> x_t-1
+                latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
+
+                # call the callback, if provided
+                if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
+                    progress_bar.update()
+                    if callback is not None and i % callback_steps == 0:
+                        callback(i, t, latents)
+
+        if not output_type == "latent":
+            image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
+            image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype)
         else:
-            lora_scale = cross_attention_kwargs.get("scale", None) if cross_attention_kwargs is not None else None
-
-        with ContextManagers(forward_context_managers):
-            prompt_embeds, negative_prompt_embeds = self.encode_prompt(
-                prompt,
-                device,
-                num_images_per_prompt,
-                do_classifier_free_guidance,
-                negative_prompt,
-                prompt_embeds=prompt_embeds,
-                negative_prompt_embeds=negative_prompt_embeds,
-                lora_scale=lora_scale,
-                clip_skip=clip_skip,
-            )
-            # For classifier free guidance, we need to do two forward passes.
-            # Here we concatenate the unconditional and text embeddings into a single batch
-            # to avoid doing two forward passes
-            if do_classifier_free_guidance:
-                prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
-
-            # 4. Prepare timesteps
-            self.scheduler.set_timesteps(num_inference_steps, device=device)
-            timesteps = self.scheduler.timesteps
-
-            # 5. Prepare latent variables
-            num_channels_latents = self.unet.config.in_channels
-            latents = self.prepare_latents(
-                batch_size * num_images_per_prompt,
-                num_channels_latents,
-                height,
-                width,
-                prompt_embeds.dtype,
-                device,
-                generator,
-                latents,
-            )
-
-            # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
-            extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
-
-            # 7. Denoising loop
-            num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
-            with self.progress_bar(total=num_inference_steps) as progress_bar:
-                for i, t in enumerate(timesteps):
-                    # expand the latents if we are doing classifier free guidance
-                    latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
-                    latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
-
-                    # predict the noise residual
-                    noise_pred = self.unet(
-                        latent_model_input,
-                        t,
-                        encoder_hidden_states=prompt_embeds,
-                        cross_attention_kwargs=cross_attention_kwargs,
-                        return_dict=False,
-                    )[0]
-
-                    # perform guidance
-                    if do_classifier_free_guidance:
-                        noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
-                        noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
-
-                    if do_classifier_free_guidance and guidance_rescale > 0.0:
-                        # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
-                        noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale)
-
-                    # compute the previous noisy sample x_t -> x_t-1
-                    latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
-
-                    # call the callback, if provided
-                    if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
-                        progress_bar.update()
-                        if callback is not None and i % callback_steps == 0:
-                            callback(i, t, latents)
-
-            if not output_type == "latent":
-                image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
-                image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype)
-            else:
-                image = latents
-                has_nsfw_concept = None
+            image = latents
+            has_nsfw_concept = None
 
-            if has_nsfw_concept is None:
-                do_denormalize = [True] * image.shape[0]
-            else:
-                do_denormalize = [not has_nsfw for has_nsfw in has_nsfw_concept]
+        if has_nsfw_concept is None:
+            do_denormalize = [True] * image.shape[0]
+        else:
+            do_denormalize = [not has_nsfw for has_nsfw in has_nsfw_concept]
 
-            image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize)
+        image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize)
 
-            # Offload all models
-            self.maybe_free_model_hooks()
+        # Offload all models
+        self.maybe_free_model_hooks()
 
         if not return_dict:
             return (image, has_nsfw_concept)
diff --git a/src/diffusers/utils/__init__.py b/src/diffusers/utils/__init__.py
index 3cd7c95740a0..778516aee969 100644
--- a/src/diffusers/utils/__init__.py
+++ b/src/diffusers/utils/__init__.py
@@ -35,7 +35,6 @@
 from .doc_utils import replace_example_docstring
 from .dynamic_modules_utils import get_class_from_dynamic_module
 from .export_utils import export_to_gif, export_to_obj, export_to_ply, export_to_video
-from .generic import ContextManagers
 from .hub_utils import (
     HF_HUB_OFFLINE,
     PushToHubMixin,
@@ -87,7 +86,6 @@
 from .outputs import BaseOutput
 from .peft_utils import (
     MIN_PEFT_VERSION,
-    PeftLayerScaler,
     check_peft_version,
     recurse_remove_peft_layers,
     scale_peft_layers,
diff --git a/src/diffusers/utils/generic.py b/src/diffusers/utils/generic.py
deleted file mode 100644
index 8e562b2ec919..000000000000
--- a/src/diffusers/utils/generic.py
+++ /dev/null
@@ -1,36 +0,0 @@
-# Copyright 2022 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Generic utilities
-"""
-from contextlib import ExitStack
-from typing import ContextManager, List
-
-
-class ContextManagers:
-    """
-    Wrapper for `contextlib.ExitStack` which enters a collection of context managers. Adaptation of `ContextManagers`
-    in the `fastcore` library - also exists in transformers library.
-    """
-
-    def __init__(self, context_managers: List[ContextManager]):
-        self.context_managers = context_managers
-        self.stack = ExitStack()
-
-    def __enter__(self):
-        for context_manager in self.context_managers:
-            self.stack.enter_context(context_manager)
-
-    def __exit__(self, *args, **kwargs):
-        self.stack.__exit__(*args, **kwargs)
diff --git a/src/diffusers/utils/peft_utils.py b/src/diffusers/utils/peft_utils.py
index b9a238201282..68e7ee73f4c4 100644
--- a/src/diffusers/utils/peft_utils.py
+++ b/src/diffusers/utils/peft_utils.py
@@ -15,16 +15,12 @@
 PEFT utilities: Utilities related to peft library
 """
 import importlib
-from typing import List
 
 from packaging import version
 
 from .import_utils import is_peft_available, is_torch_available
 
 
-if is_torch_available():
-    import torch
-
 MIN_PEFT_VERSION = "0.5.0"
 
 
@@ -161,26 +157,6 @@ def unscale_peft_layers(model, scale: float = None):
                     del module.scaling["_hf_peft_original_scales"]
 
 
-class PeftLayerScaler:
-    r"""
-    A custom context manager that scale / unscale PEFT layers before and after the forward pass.
-    """
-
-    def __init__(self, modules_to_scale: List["torch.nn.Module"], scale: float = None):
-        self.modules_to_scale = modules_to_scale
-        self.scale = scale
-
-    def __enter__(self, *args, **kwargs):
-        if self.scale is not None and self.scale != 1.0:
-            for submodule in self.modules_to_scale:
-                scale_peft_layers(submodule, self.scale)
-
-    def __exit__(self, *args, **kwargs):
-        if self.scale is not None and self.scale != 1.0:
-            for submodule in self.modules_to_scale:
-                unscale_peft_layers(submodule, self.scale)
-
-
 def check_peft_version(min_version: str) -> None:
     r"""
     Checks if the version of PEFT is compatible.

From 4ea89593c86be8a0a76f2833a2b86b722343d7b4 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Wed, 27 Sep 2023 09:57:57 +0000
Subject: [PATCH 011/134] fix issues

---
 src/diffusers/models/modeling_utils.py        |  5 ++-
 src/diffusers/models/unet_2d_condition.py     |  7 ++--
 .../pipeline_stable_diffusion_xl.py           | 33 ++++++++-----------
 3 files changed, 20 insertions(+), 25 deletions(-)

diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
index b766c97a6a2e..9aa323939bd3 100644
--- a/src/diffusers/models/modeling_utils.py
+++ b/src/diffusers/models/modeling_utils.py
@@ -360,7 +360,10 @@ def set_adapter(self, adapter_name: str) -> None:
 
         for _, module in self.named_modules():
             if isinstance(module, BaseTunerLayer):
-                module.active_adapter = adapter_name
+                if hasattr(module, "set_adapter"):
+                    module.set_adapter(adapter_name)
+                else:
+                    module.active_adapter = adapter_name
                 _adapters_has_been_set = True
 
         if not _adapters_has_been_set:
diff --git a/src/diffusers/models/unet_2d_condition.py b/src/diffusers/models/unet_2d_condition.py
index 9a9521994197..0857a668dfaa 100644
--- a/src/diffusers/models/unet_2d_condition.py
+++ b/src/diffusers/models/unet_2d_condition.py
@@ -939,11 +939,8 @@ def forward(
             cross_attention_kwargs["gligen"] = {"objs": self.position_net(**gligen_args)}
 
         # 3. down
-        if not self.use_peft_backend:
-            lora_scale = cross_attention_kwargs.get("scale", 1.0) if cross_attention_kwargs is not None else 1.0
-        else:
-            # pop it to not propagate it
-            lora_scale = cross_attention_kwargs.pop("scale", 1.0) if cross_attention_kwargs is not None else 1.0
+        lora_scale = cross_attention_kwargs.get("scale", 1.0) if cross_attention_kwargs is not None else 1.0
+        if self.use_peft_backend:
             scale_peft_layers(self, lora_scale)
 
         is_controlnet = mid_block_additional_residual is not None and down_block_additional_residuals is not None
diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py
index 141c0f1b6382..79cacf6f42f6 100644
--- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py
+++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py
@@ -265,9 +265,14 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, StableDiffusionXLLoraLoaderMixin):
             self._lora_scale = lora_scale
 
-            # dynamically adjust the LoRA scale
-            adjust_lora_scale_text_encoder(self.text_encoder, lora_scale, self.use_peft_backend)
-            adjust_lora_scale_text_encoder(self.text_encoder_2, lora_scale, self.use_peft_backend)
+            if not self.use_peft_backend:
+                # dynamically adjust the LoRA scale
+                adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+                adjust_lora_scale_text_encoder(self.text_encoder_2, lora_scale)
+            else:
+                # dynamically adjust the LoRA scale
+                scale_peft_layers(self.text_encoder, lora_scale)
+                scale_peft_layers(self.text_encoder_2, lora_scale)
 
         prompt = [prompt] if isinstance(prompt, str) else prompt
 
@@ -404,6 +409,12 @@ def encode_prompt(
                 bs_embed * num_images_per_prompt, -1
             )
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+
+            if hasattr(self, "text_encoder_2"):
+                unscale_peft_layers(self.text_encoder_2, lora_scale)
+
         return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
 
     # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
@@ -753,14 +764,6 @@ def __call__(
         # 3. Encode input prompt
         lora_scale = cross_attention_kwargs.get("scale", None) if cross_attention_kwargs is not None else None
 
-        if self.use_peft_backend:
-            scale_peft_layers(self.text_encoder, lora_scale)
-
-            if hasattr(self, "text_encoder_2"):
-                scale_peft_layers(self.text_encoder_2, lora_scale)
-
-            scale_peft_layers(self.unet, lora_scale)
-
         (
             prompt_embeds,
             negative_prompt_embeds,
@@ -903,14 +906,6 @@ def __call__(
         # Offload all models
         self.maybe_free_model_hooks()
 
-        if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
-
-            if hasattr(self, "text_encoder_2"):
-                unscale_peft_layers(self.text_encoder_2, lora_scale)
-
-            unscale_peft_layers(self.unet, lora_scale)
-
         if not return_dict:
             return (image,)
 

From 16b1161231faecba5f4043debfe8e3da8f03720b Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Wed, 27 Sep 2023 10:04:37 +0000
Subject: [PATCH 012/134] fix copies

---
 .../alt_diffusion/pipeline_alt_diffusion.py   | 20 ++++++++++------
 .../pipeline_alt_diffusion_img2img.py         | 19 ++++++++++++---
 .../controlnet/pipeline_controlnet.py         | 16 +++++++------
 .../controlnet/pipeline_controlnet_img2img.py | 12 ++++++++--
 .../controlnet/pipeline_controlnet_inpaint.py | 16 +++++++------
 .../pipeline_controlnet_inpaint_sd_xl.py      | 19 ++++++++++++---
 .../controlnet/pipeline_controlnet_sd_xl.py   | 22 ++++++++++++------
 .../pipeline_controlnet_sd_xl_img2img.py      | 22 ++++++++++++------
 .../pipeline_cycle_diffusion.py               | 12 +++++++---
 ...line_stable_diffusion_attend_and_excite.py | 12 +++++++---
 .../pipeline_stable_diffusion_depth2img.py    | 12 +++++++---
 .../pipeline_stable_diffusion_diffedit.py     | 12 ++++++++--
 .../pipeline_stable_diffusion_gligen.py       | 16 +++++++------
 ...line_stable_diffusion_gligen_text_image.py | 15 +++++++-----
 .../pipeline_stable_diffusion_img2img.py      | 12 ++++++++--
 .../pipeline_stable_diffusion_inpaint.py      | 12 +++++++---
 ...ipeline_stable_diffusion_inpaint_legacy.py | 12 +++++++---
 .../pipeline_stable_diffusion_k_diffusion.py  | 12 +++++++---
 .../pipeline_stable_diffusion_ldm3d.py        | 17 +++++++-------
 ...pipeline_stable_diffusion_model_editing.py | 12 +++++++---
 .../pipeline_stable_diffusion_panorama.py     | 12 +++++++---
 .../pipeline_stable_diffusion_paradigms.py    | 16 +++++++------
 .../pipeline_stable_diffusion_pix2pix_zero.py | 12 ++++++++--
 .../pipeline_stable_diffusion_sag.py          | 12 +++++++---
 .../pipeline_stable_diffusion_upscale.py      | 12 +++++++---
 .../pipeline_stable_unclip.py                 | 16 +++++++------
 .../pipeline_stable_unclip_img2img.py         | 12 +++++++---
 .../pipeline_stable_diffusion_xl_img2img.py   | 19 ++++++++++++---
 .../pipeline_stable_diffusion_xl_inpaint.py   | 19 ++++++++++++---
 .../pipeline_stable_diffusion_adapter.py      | 12 ++++++++--
 .../pipeline_stable_diffusion_xl_adapter.py   | 23 ++++++++++++-------
 .../pipeline_text_to_video_synth.py           | 16 +++++++------
 .../pipeline_text_to_video_synth_img2img.py   | 16 +++++++------
 .../versatile_diffusion/modeling_text_unet.py |  7 +++++-
 34 files changed, 358 insertions(+), 148 deletions(-)

diff --git a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py
index c5eb7dbf5330..e8219b3e3cfc 100644
--- a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py
+++ b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py
@@ -25,7 +25,7 @@
 from ...models import AutoencoderKL, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import deprecate, logging, replace_example_docstring
+from ...utils import deprecate, logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -303,8 +303,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
-            # dynamically adjust the LoRA scale
-            adjust_lora_scale_text_encoder(self.text_encoder, lora_scale, self.use_peft_backend)
+            if not self.use_peft_backend:
+                # dynamically adjust the LoRA scale
+                adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+            else:
+                scale_peft_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -429,6 +432,9 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+
         return prompt_embeds, negative_prompt_embeds
 
     def run_safety_checker(self, image, device, dtype):
@@ -635,6 +641,7 @@ def __call__(
         # 0. Default height and width to unet
         height = height or self.unet.config.sample_size * self.vae_scale_factor
         width = width or self.unet.config.sample_size * self.vae_scale_factor
+        # to deal with lora scaling and other possible forward hooks
 
         # 1. Check inputs. Raise error if not correct
         self.check_inputs(
@@ -656,9 +663,8 @@ def __call__(
         do_classifier_free_guidance = guidance_scale > 1.0
 
         # 3. Encode input prompt
-        text_encoder_lora_scale = (
-            cross_attention_kwargs.get("scale", None) if cross_attention_kwargs is not None else None
-        )
+        lora_scale = cross_attention_kwargs.get("scale", None) if cross_attention_kwargs is not None else None
+
         prompt_embeds, negative_prompt_embeds = self.encode_prompt(
             prompt,
             device,
@@ -667,7 +673,7 @@ def __call__(
             negative_prompt,
             prompt_embeds=prompt_embeds,
             negative_prompt_embeds=negative_prompt_embeds,
-            lora_scale=text_encoder_lora_scale,
+            lora_scale=lora_scale,
             clip_skip=clip_skip,
         )
         # For classifier free guidance, we need to do two forward passes.
diff --git a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py
index f4fcf86a7f45..2ed6a6e60420 100644
--- a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py
+++ b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py
@@ -27,7 +27,14 @@
 from ...models import AutoencoderKL, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import PIL_INTERPOLATION, deprecate, logging, replace_example_docstring
+from ...utils import (
+    PIL_INTERPOLATION,
+    deprecate,
+    logging,
+    replace_example_docstring,
+    scale_peft_layers,
+    unscale_peft_layers,
+)
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -301,8 +308,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
-            # dynamically adjust the LoRA scale
-            adjust_lora_scale_text_encoder(self.text_encoder, lora_scale, self.use_peft_backend)
+            if not self.use_peft_backend:
+                # dynamically adjust the LoRA scale
+                adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+            else:
+                scale_peft_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -427,6 +437,9 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+
         return prompt_embeds, negative_prompt_embeds
 
     def run_safety_checker(self, image, device, dtype):
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py
index 221c78eab991..c29254233931 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py
@@ -27,11 +27,7 @@
 from ...models import AutoencoderKL, ControlNetModel, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import (
-    deprecate,
-    logging,
-    replace_example_docstring,
-)
+from ...utils import deprecate, logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
 from ...utils.torch_utils import is_compiled_module, randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from ..stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
@@ -290,8 +286,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
-            # dynamically adjust the LoRA scale
-            adjust_lora_scale_text_encoder(self.text_encoder, lora_scale, self.use_peft_backend)
+            if not self.use_peft_backend:
+                # dynamically adjust the LoRA scale
+                adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+            else:
+                scale_peft_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -416,6 +415,9 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+
         return prompt_embeds, negative_prompt_embeds
 
     # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.run_safety_checker
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py
index 85dced1dc9c3..e535425937c6 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py
@@ -30,6 +30,8 @@
     deprecate,
     logging,
     replace_example_docstring,
+    scale_peft_layers,
+    unscale_peft_layers,
 )
 from ...utils.torch_utils import is_compiled_module, randn_tensor
 from ..pipeline_utils import DiffusionPipeline
@@ -314,8 +316,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
-            # dynamically adjust the LoRA scale
-            adjust_lora_scale_text_encoder(self.text_encoder, lora_scale, self.use_peft_backend)
+            if not self.use_peft_backend:
+                # dynamically adjust the LoRA scale
+                adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+            else:
+                scale_peft_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -440,6 +445,9 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+
         return prompt_embeds, negative_prompt_embeds
 
     # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.run_safety_checker
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py
index 2065343fe06c..62de1d14fa83 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py
@@ -28,11 +28,7 @@
 from ...models import AutoencoderKL, ControlNetModel, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import (
-    deprecate,
-    logging,
-    replace_example_docstring,
-)
+from ...utils import deprecate, logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
 from ...utils.torch_utils import is_compiled_module, randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from ..stable_diffusion import StableDiffusionPipelineOutput
@@ -441,8 +437,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
-            # dynamically adjust the LoRA scale
-            adjust_lora_scale_text_encoder(self.text_encoder, lora_scale, self.use_peft_backend)
+            if not self.use_peft_backend:
+                # dynamically adjust the LoRA scale
+                adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+            else:
+                scale_peft_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -567,6 +566,9 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+
         return prompt_embeds, negative_prompt_embeds
 
     # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.run_safety_checker
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py
index 739256f0c43b..41be942d33c0 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py
@@ -36,6 +36,8 @@
     is_invisible_watermark_available,
     logging,
     replace_example_docstring,
+    scale_peft_layers,
+    unscale_peft_layers,
 )
 from ...utils.torch_utils import is_compiled_module, randn_tensor
 from ..pipeline_utils import DiffusionPipeline
@@ -313,9 +315,14 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, StableDiffusionXLLoraLoaderMixin):
             self._lora_scale = lora_scale
 
-            # dynamically adjust the LoRA scale
-            adjust_lora_scale_text_encoder(self.text_encoder, lora_scale, self.use_peft_backend)
-            adjust_lora_scale_text_encoder(self.text_encoder_2, lora_scale, self.use_peft_backend)
+            if not self.use_peft_backend:
+                # dynamically adjust the LoRA scale
+                adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+                adjust_lora_scale_text_encoder(self.text_encoder_2, lora_scale)
+            else:
+                # dynamically adjust the LoRA scale
+                scale_peft_layers(self.text_encoder, lora_scale)
+                scale_peft_layers(self.text_encoder_2, lora_scale)
 
         prompt = [prompt] if isinstance(prompt, str) else prompt
 
@@ -452,6 +459,12 @@ def encode_prompt(
                 bs_embed * num_images_per_prompt, -1
             )
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+
+            if hasattr(self, "text_encoder_2"):
+                unscale_peft_layers(self.text_encoder_2, lora_scale)
+
         return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
 
     # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py
index 8c10e6acbf94..f9d21e01fa4f 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py
@@ -35,10 +35,7 @@
 )
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import (
-    logging,
-    replace_example_docstring,
-)
+from ...utils import logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
 from ...utils.torch_utils import is_compiled_module, randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
@@ -287,9 +284,14 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, StableDiffusionXLLoraLoaderMixin):
             self._lora_scale = lora_scale
 
-            # dynamically adjust the LoRA scale
-            adjust_lora_scale_text_encoder(self.text_encoder, lora_scale, self.use_peft_backend)
-            adjust_lora_scale_text_encoder(self.text_encoder_2, lora_scale, self.use_peft_backend)
+            if not self.use_peft_backend:
+                # dynamically adjust the LoRA scale
+                adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+                adjust_lora_scale_text_encoder(self.text_encoder_2, lora_scale)
+            else:
+                # dynamically adjust the LoRA scale
+                scale_peft_layers(self.text_encoder, lora_scale)
+                scale_peft_layers(self.text_encoder_2, lora_scale)
 
         prompt = [prompt] if isinstance(prompt, str) else prompt
 
@@ -426,6 +428,12 @@ def encode_prompt(
                 bs_embed * num_images_per_prompt, -1
             )
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+
+            if hasattr(self, "text_encoder_2"):
+                unscale_peft_layers(self.text_encoder_2, lora_scale)
+
         return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
 
     # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py
index 92fec3a0739d..01f0c887a2a0 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py
@@ -35,10 +35,7 @@
 )
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import (
-    logging,
-    replace_example_docstring,
-)
+from ...utils import logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
 from ...utils.torch_utils import is_compiled_module, randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
@@ -325,9 +322,14 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, StableDiffusionXLLoraLoaderMixin):
             self._lora_scale = lora_scale
 
-            # dynamically adjust the LoRA scale
-            adjust_lora_scale_text_encoder(self.text_encoder, lora_scale, self.use_peft_backend)
-            adjust_lora_scale_text_encoder(self.text_encoder_2, lora_scale, self.use_peft_backend)
+            if not self.use_peft_backend:
+                # dynamically adjust the LoRA scale
+                adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+                adjust_lora_scale_text_encoder(self.text_encoder_2, lora_scale)
+            else:
+                # dynamically adjust the LoRA scale
+                scale_peft_layers(self.text_encoder, lora_scale)
+                scale_peft_layers(self.text_encoder_2, lora_scale)
 
         prompt = [prompt] if isinstance(prompt, str) else prompt
 
@@ -464,6 +466,12 @@ def encode_prompt(
                 bs_embed * num_images_per_prompt, -1
             )
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+
+            if hasattr(self, "text_encoder_2"):
+                unscale_peft_layers(self.text_encoder_2, lora_scale)
+
         return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
 
     # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py
index 6f4b32aee062..6a5b830b6e7e 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py
@@ -27,7 +27,7 @@
 from ...models import AutoencoderKL, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import DDIMScheduler
-from ...utils import PIL_INTERPOLATION, deprecate, logging
+from ...utils import PIL_INTERPOLATION, deprecate, logging, scale_peft_layers, unscale_peft_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from .pipeline_output import StableDiffusionPipelineOutput
@@ -307,8 +307,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
-            # dynamically adjust the LoRA scale
-            adjust_lora_scale_text_encoder(self.text_encoder, lora_scale, self.use_peft_backend)
+            if not self.use_peft_backend:
+                # dynamically adjust the LoRA scale
+                adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+            else:
+                scale_peft_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -433,6 +436,9 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+
         return prompt_embeds, negative_prompt_embeds
 
     # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.StableDiffusionImg2ImgPipeline.check_inputs
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py
index 6cd5939d87a9..fa221b67f37d 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py
@@ -27,7 +27,7 @@
 from ...models.attention_processor import Attention
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import deprecate, logging, replace_example_docstring
+from ...utils import deprecate, logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import StableDiffusionPipelineOutput
@@ -331,8 +331,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
-            # dynamically adjust the LoRA scale
-            adjust_lora_scale_text_encoder(self.text_encoder, lora_scale, self.use_peft_backend)
+            if not self.use_peft_backend:
+                # dynamically adjust the LoRA scale
+                adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+            else:
+                scale_peft_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -457,6 +460,9 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+
         return prompt_embeds, negative_prompt_embeds
 
     # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.run_safety_checker
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py
index eee91028f6e8..f2cc387158d3 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py
@@ -28,7 +28,7 @@
 from ...models import AutoencoderKL, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import PIL_INTERPOLATION, deprecate, logging
+from ...utils import PIL_INTERPOLATION, deprecate, logging, scale_peft_layers, unscale_peft_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
 
@@ -212,8 +212,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
-            # dynamically adjust the LoRA scale
-            adjust_lora_scale_text_encoder(self.text_encoder, lora_scale, self.use_peft_backend)
+            if not self.use_peft_backend:
+                # dynamically adjust the LoRA scale
+                adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+            else:
+                scale_peft_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -338,6 +341,9 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+
         return prompt_embeds, negative_prompt_embeds
 
     # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.run_safety_checker
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py
index 97278d06371d..2d791c47ebda 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py
@@ -34,6 +34,8 @@
     deprecate,
     logging,
     replace_example_docstring,
+    scale_peft_layers,
+    unscale_peft_layers,
 )
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
@@ -480,8 +482,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
-            # dynamically adjust the LoRA scale
-            adjust_lora_scale_text_encoder(self.text_encoder, lora_scale, self.use_peft_backend)
+            if not self.use_peft_backend:
+                # dynamically adjust the LoRA scale
+                adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+            else:
+                scale_peft_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -606,6 +611,9 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+
         return prompt_embeds, negative_prompt_embeds
 
     # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.run_safety_checker
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py
index 40c058e78001..51d1707f7abd 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py
@@ -26,11 +26,7 @@
 from ...models.attention import GatedSelfAttentionDense
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import (
-    deprecate,
-    logging,
-    replace_example_docstring,
-)
+from ...utils import deprecate, logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import StableDiffusionPipelineOutput
@@ -277,8 +273,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
-            # dynamically adjust the LoRA scale
-            adjust_lora_scale_text_encoder(self.text_encoder, lora_scale, self.use_peft_backend)
+            if not self.use_peft_backend:
+                # dynamically adjust the LoRA scale
+                adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+            else:
+                scale_peft_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -403,6 +402,9 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+
         return prompt_embeds, negative_prompt_embeds
 
     # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.run_safety_checker
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen_text_image.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen_text_image.py
index 6b9a6761bd34..9be1d7bbd8e7 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen_text_image.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen_text_image.py
@@ -32,10 +32,7 @@
 from ...models.attention import GatedSelfAttentionDense
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import (
-    logging,
-    replace_example_docstring,
-)
+from ...utils import logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import StableDiffusionPipelineOutput
@@ -308,8 +305,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
-            # dynamically adjust the LoRA scale
-            adjust_lora_scale_text_encoder(self.text_encoder, lora_scale, self.use_peft_backend)
+            if not self.use_peft_backend:
+                # dynamically adjust the LoRA scale
+                adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+            else:
+                scale_peft_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -434,6 +434,9 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+
         return prompt_embeds, negative_prompt_embeds
 
     # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.run_safety_checker
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
index d8e7ba3e5f90..3b85191e8a8e 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
@@ -32,6 +32,8 @@
     deprecate,
     logging,
     replace_example_docstring,
+    scale_peft_layers,
+    unscale_peft_layers,
 )
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
@@ -301,8 +303,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
-            # dynamically adjust the LoRA scale
-            adjust_lora_scale_text_encoder(self.text_encoder, lora_scale, self.use_peft_backend)
+            if not self.use_peft_backend:
+                # dynamically adjust the LoRA scale
+                adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+            else:
+                scale_peft_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -427,6 +432,9 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+
         return prompt_embeds, negative_prompt_embeds
 
     # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.run_safety_checker
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
index 290e1eb6fae5..ec5bce477b45 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
@@ -27,7 +27,7 @@
 from ...models import AsymmetricAutoencoderKL, AutoencoderKL, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import deprecate, logging
+from ...utils import deprecate, logging, scale_peft_layers, unscale_peft_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import StableDiffusionPipelineOutput
@@ -374,8 +374,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
-            # dynamically adjust the LoRA scale
-            adjust_lora_scale_text_encoder(self.text_encoder, lora_scale, self.use_peft_backend)
+            if not self.use_peft_backend:
+                # dynamically adjust the LoRA scale
+                adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+            else:
+                scale_peft_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -500,6 +503,9 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+
         return prompt_embeds, negative_prompt_embeds
 
     # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.run_safety_checker
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py
index c21ceb169bf3..9c0dc4f013f4 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py
@@ -27,7 +27,7 @@
 from ...models import AutoencoderKL, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import PIL_INTERPOLATION, deprecate, logging
+from ...utils import PIL_INTERPOLATION, deprecate, logging, scale_peft_layers, unscale_peft_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import StableDiffusionPipelineOutput
@@ -296,8 +296,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
-            # dynamically adjust the LoRA scale
-            adjust_lora_scale_text_encoder(self.text_encoder, lora_scale, self.use_peft_backend)
+            if not self.use_peft_backend:
+                # dynamically adjust the LoRA scale
+                adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+            else:
+                scale_peft_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -422,6 +425,9 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+
         return prompt_embeds, negative_prompt_embeds
 
     # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.run_safety_checker
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py
index ff85af14fb1d..ea6f7015489f 100755
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py
@@ -24,7 +24,7 @@
 from ...loaders import LoraLoaderMixin, TextualInversionLoaderMixin
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import LMSDiscreteScheduler
-from ...utils import deprecate, logging
+from ...utils import deprecate, logging, scale_peft_layers, unscale_peft_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import StableDiffusionPipelineOutput
@@ -210,8 +210,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
-            # dynamically adjust the LoRA scale
-            adjust_lora_scale_text_encoder(self.text_encoder, lora_scale, self.use_peft_backend)
+            if not self.use_peft_backend:
+                # dynamically adjust the LoRA scale
+                adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+            else:
+                scale_peft_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -336,6 +339,9 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+
         return prompt_embeds, negative_prompt_embeds
 
     # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.run_safety_checker
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py
index eea5383f9029..f4515f76698d 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py
@@ -26,12 +26,7 @@
 from ...models import AutoencoderKL, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import (
-    BaseOutput,
-    deprecate,
-    logging,
-    replace_example_docstring,
-)
+from ...utils import BaseOutput, deprecate, logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from .safety_checker import StableDiffusionSafetyChecker
@@ -271,8 +266,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
-            # dynamically adjust the LoRA scale
-            adjust_lora_scale_text_encoder(self.text_encoder, lora_scale, self.use_peft_backend)
+            if not self.use_peft_backend:
+                # dynamically adjust the LoRA scale
+                adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+            else:
+                scale_peft_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -397,6 +395,9 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+
         return prompt_embeds, negative_prompt_embeds
 
     def run_safety_checker(self, image, device, dtype):
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py
index 9da9fa046bcc..6ebfb4068fbd 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py
@@ -24,7 +24,7 @@
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import PNDMScheduler
 from ...schedulers.scheduling_utils import SchedulerMixin
-from ...utils import deprecate, logging
+from ...utils import deprecate, logging, scale_peft_layers, unscale_peft_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import StableDiffusionPipelineOutput
@@ -243,8 +243,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
-            # dynamically adjust the LoRA scale
-            adjust_lora_scale_text_encoder(self.text_encoder, lora_scale, self.use_peft_backend)
+            if not self.use_peft_backend:
+                # dynamically adjust the LoRA scale
+                adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+            else:
+                scale_peft_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -369,6 +372,9 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+
         return prompt_embeds, negative_prompt_embeds
 
     # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.run_safety_checker
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py
index a284c6a32408..d442d17e8d23 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py
@@ -23,7 +23,7 @@
 from ...models import AutoencoderKL, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import DDIMScheduler
-from ...utils import deprecate, logging, replace_example_docstring
+from ...utils import deprecate, logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import StableDiffusionPipelineOutput
@@ -220,8 +220,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
-            # dynamically adjust the LoRA scale
-            adjust_lora_scale_text_encoder(self.text_encoder, lora_scale, self.use_peft_backend)
+            if not self.use_peft_backend:
+                # dynamically adjust the LoRA scale
+                adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+            else:
+                scale_peft_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -346,6 +349,9 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+
         return prompt_embeds, negative_prompt_embeds
 
     # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.run_safety_checker
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py
index fb65e1494757..d34ccb8e512e 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py
@@ -23,11 +23,7 @@
 from ...models import AutoencoderKL, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import (
-    deprecate,
-    logging,
-    replace_example_docstring,
-)
+from ...utils import deprecate, logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import StableDiffusionPipelineOutput
@@ -255,8 +251,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
-            # dynamically adjust the LoRA scale
-            adjust_lora_scale_text_encoder(self.text_encoder, lora_scale, self.use_peft_backend)
+            if not self.use_peft_backend:
+                # dynamically adjust the LoRA scale
+                adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+            else:
+                scale_peft_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -381,6 +380,9 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+
         return prompt_embeds, negative_prompt_embeds
 
     # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.run_safety_checker
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py
index d24db4526536..21b11e162302 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py
@@ -41,6 +41,8 @@
     deprecate,
     logging,
     replace_example_docstring,
+    scale_peft_layers,
+    unscale_peft_layers,
 )
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
@@ -445,8 +447,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
-            # dynamically adjust the LoRA scale
-            adjust_lora_scale_text_encoder(self.text_encoder, lora_scale, self.use_peft_backend)
+            if not self.use_peft_backend:
+                # dynamically adjust the LoRA scale
+                adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+            else:
+                scale_peft_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -571,6 +576,9 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+
         return prompt_embeds, negative_prompt_embeds
 
     # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.run_safety_checker
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py
index 267fd394ce25..c288da5fd64f 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py
@@ -24,7 +24,7 @@
 from ...models import AutoencoderKL, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import deprecate, logging, replace_example_docstring
+from ...utils import deprecate, logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import StableDiffusionPipelineOutput
@@ -243,8 +243,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
-            # dynamically adjust the LoRA scale
-            adjust_lora_scale_text_encoder(self.text_encoder, lora_scale, self.use_peft_backend)
+            if not self.use_peft_backend:
+                # dynamically adjust the LoRA scale
+                adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+            else:
+                scale_peft_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -369,6 +372,9 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+
         return prompt_embeds, negative_prompt_embeds
 
     # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.run_safety_checker
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
index 2ea14292cccc..2643b2a09e07 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
@@ -32,7 +32,7 @@
 )
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import DDPMScheduler, KarrasDiffusionSchedulers
-from ...utils import deprecate, logging
+from ...utils import deprecate, logging, scale_peft_layers, unscale_peft_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import StableDiffusionPipelineOutput
@@ -239,8 +239,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
-            # dynamically adjust the LoRA scale
-            adjust_lora_scale_text_encoder(self.text_encoder, lora_scale, self.use_peft_backend)
+            if not self.use_peft_backend:
+                # dynamically adjust the LoRA scale
+                adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+            else:
+                scale_peft_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -365,6 +368,9 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+
         return prompt_embeds, negative_prompt_embeds
 
     # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py
index 62e36652c34f..fddc2057369c 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py
@@ -25,11 +25,7 @@
 from ...models.embeddings import get_timestep_embedding
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import (
-    deprecate,
-    logging,
-    replace_example_docstring,
-)
+from ...utils import deprecate, logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
 from .stable_unclip_image_normalizer import StableUnCLIPImageNormalizer
@@ -345,8 +341,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
-            # dynamically adjust the LoRA scale
-            adjust_lora_scale_text_encoder(self.text_encoder, lora_scale, self.use_peft_backend)
+            if not self.use_peft_backend:
+                # dynamically adjust the LoRA scale
+                adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+            else:
+                scale_peft_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -471,6 +470,9 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+
         return prompt_embeds, negative_prompt_embeds
 
     # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.decode_latents
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py
index 6189f751514e..75fdbcbb73a9 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py
@@ -25,7 +25,7 @@
 from ...models.embeddings import get_timestep_embedding
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import deprecate, logging, replace_example_docstring
+from ...utils import deprecate, logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
 from .stable_unclip_image_normalizer import StableUnCLIPImageNormalizer
@@ -295,8 +295,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
-            # dynamically adjust the LoRA scale
-            adjust_lora_scale_text_encoder(self.text_encoder, lora_scale, self.use_peft_backend)
+            if not self.use_peft_backend:
+                # dynamically adjust the LoRA scale
+                adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+            else:
+                scale_peft_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -421,6 +424,9 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+
         return prompt_embeds, negative_prompt_embeds
 
     # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.decode_latents
diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
index e8200ce9a307..f7012df9a276 100644
--- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
+++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
@@ -34,6 +34,8 @@
     is_invisible_watermark_available,
     logging,
     replace_example_docstring,
+    scale_peft_layers,
+    unscale_peft_layers,
 )
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
@@ -270,9 +272,14 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, StableDiffusionXLLoraLoaderMixin):
             self._lora_scale = lora_scale
 
-            # dynamically adjust the LoRA scale
-            adjust_lora_scale_text_encoder(self.text_encoder, lora_scale, self.use_peft_backend)
-            adjust_lora_scale_text_encoder(self.text_encoder_2, lora_scale, self.use_peft_backend)
+            if not self.use_peft_backend:
+                # dynamically adjust the LoRA scale
+                adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+                adjust_lora_scale_text_encoder(self.text_encoder_2, lora_scale)
+            else:
+                # dynamically adjust the LoRA scale
+                scale_peft_layers(self.text_encoder, lora_scale)
+                scale_peft_layers(self.text_encoder_2, lora_scale)
 
         prompt = [prompt] if isinstance(prompt, str) else prompt
 
@@ -409,6 +416,12 @@ def encode_prompt(
                 bs_embed * num_images_per_prompt, -1
             )
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+
+            if hasattr(self, "text_encoder_2"):
+                unscale_peft_layers(self.text_encoder_2, lora_scale)
+
         return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
 
     # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py
index 49acfafd71a7..aa05ae62aed7 100644
--- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py
+++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py
@@ -36,6 +36,8 @@
     is_invisible_watermark_available,
     logging,
     replace_example_docstring,
+    scale_peft_layers,
+    unscale_peft_layers,
 )
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
@@ -419,9 +421,14 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, StableDiffusionXLLoraLoaderMixin):
             self._lora_scale = lora_scale
 
-            # dynamically adjust the LoRA scale
-            adjust_lora_scale_text_encoder(self.text_encoder, lora_scale, self.use_peft_backend)
-            adjust_lora_scale_text_encoder(self.text_encoder_2, lora_scale, self.use_peft_backend)
+            if not self.use_peft_backend:
+                # dynamically adjust the LoRA scale
+                adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+                adjust_lora_scale_text_encoder(self.text_encoder_2, lora_scale)
+            else:
+                # dynamically adjust the LoRA scale
+                scale_peft_layers(self.text_encoder, lora_scale)
+                scale_peft_layers(self.text_encoder_2, lora_scale)
 
         prompt = [prompt] if isinstance(prompt, str) else prompt
 
@@ -558,6 +565,12 @@ def encode_prompt(
                 bs_embed * num_images_per_prompt, -1
             )
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+
+            if hasattr(self, "text_encoder_2"):
+                unscale_peft_layers(self.text_encoder_2, lora_scale)
+
         return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
 
     # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py
index 76bb8e77814f..efdef5a4f4a8 100644
--- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py
+++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py
@@ -32,6 +32,8 @@
     deprecate,
     logging,
     replace_example_docstring,
+    scale_peft_layers,
+    unscale_peft_layers,
 )
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
@@ -295,8 +297,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
-            # dynamically adjust the LoRA scale
-            adjust_lora_scale_text_encoder(self.text_encoder, lora_scale, self.use_peft_backend)
+            if not self.use_peft_backend:
+                # dynamically adjust the LoRA scale
+                adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+            else:
+                scale_peft_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -421,6 +426,9 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+
         return prompt_embeds, negative_prompt_embeds
 
     # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.run_safety_checker
diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py
index d3d709e07bab..ead17918ad4f 100644
--- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py
+++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py
@@ -31,11 +31,7 @@
 )
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import (
-    PIL_INTERPOLATION,
-    logging,
-    replace_example_docstring,
-)
+from ...utils import PIL_INTERPOLATION, logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
@@ -286,9 +282,14 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, StableDiffusionXLLoraLoaderMixin):
             self._lora_scale = lora_scale
 
-            # dynamically adjust the LoRA scale
-            adjust_lora_scale_text_encoder(self.text_encoder, lora_scale, self.use_peft_backend)
-            adjust_lora_scale_text_encoder(self.text_encoder_2, lora_scale, self.use_peft_backend)
+            if not self.use_peft_backend:
+                # dynamically adjust the LoRA scale
+                adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+                adjust_lora_scale_text_encoder(self.text_encoder_2, lora_scale)
+            else:
+                # dynamically adjust the LoRA scale
+                scale_peft_layers(self.text_encoder, lora_scale)
+                scale_peft_layers(self.text_encoder_2, lora_scale)
 
         prompt = [prompt] if isinstance(prompt, str) else prompt
 
@@ -425,6 +426,12 @@ def encode_prompt(
                 bs_embed * num_images_per_prompt, -1
             )
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+
+            if hasattr(self, "text_encoder_2"):
+                unscale_peft_layers(self.text_encoder_2, lora_scale)
+
         return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
 
     # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py
index e59070a4122b..37056a6ef186 100644
--- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py
+++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py
@@ -23,11 +23,7 @@
 from ...models import AutoencoderKL, UNet3DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import (
-    deprecate,
-    logging,
-    replace_example_docstring,
-)
+from ...utils import deprecate, logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import TextToVideoSDPipelineOutput
@@ -227,8 +223,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
-            # dynamically adjust the LoRA scale
-            adjust_lora_scale_text_encoder(self.text_encoder, lora_scale, self.use_peft_backend)
+            if not self.use_peft_backend:
+                # dynamically adjust the LoRA scale
+                adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+            else:
+                scale_peft_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -353,6 +352,9 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+
         return prompt_embeds, negative_prompt_embeds
 
     def decode_latents(self, latents):
diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py
index 504273db86dc..1355c0e14b80 100644
--- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py
+++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py
@@ -24,11 +24,7 @@
 from ...models import AutoencoderKL, UNet3DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import (
-    deprecate,
-    logging,
-    replace_example_docstring,
-)
+from ...utils import deprecate, logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import TextToVideoSDPipelineOutput
@@ -289,8 +285,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
-            # dynamically adjust the LoRA scale
-            adjust_lora_scale_text_encoder(self.text_encoder, lora_scale, self.use_peft_backend)
+            if not self.use_peft_backend:
+                # dynamically adjust the LoRA scale
+                adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+            else:
+                scale_peft_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -415,6 +414,9 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self.text_encoder, lora_scale)
+
         return prompt_embeds, negative_prompt_embeds
 
     # Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.decode_latents
diff --git a/src/diffusers/pipelines/versatile_diffusion/modeling_text_unet.py b/src/diffusers/pipelines/versatile_diffusion/modeling_text_unet.py
index 99bf1d22ee91..9e5c6b56558f 100644
--- a/src/diffusers/pipelines/versatile_diffusion/modeling_text_unet.py
+++ b/src/diffusers/pipelines/versatile_diffusion/modeling_text_unet.py
@@ -31,7 +31,7 @@
 )
 from ...models.transformer_2d import Transformer2DModel
 from ...models.unet_2d_condition import UNet2DConditionOutput
-from ...utils import is_torch_version, logging
+from ...utils import is_torch_version, logging, scale_peft_layers, unscale_peft_layers
 
 
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
@@ -1155,6 +1155,8 @@ def forward(
 
         # 3. down
         lora_scale = cross_attention_kwargs.get("scale", 1.0) if cross_attention_kwargs is not None else 1.0
+        if self.use_peft_backend:
+            scale_peft_layers(self, lora_scale)
 
         is_controlnet = mid_block_additional_residual is not None and down_block_additional_residuals is not None
         is_adapter = mid_block_additional_residual is None and down_block_additional_residuals is not None
@@ -1254,6 +1256,9 @@ def forward(
             sample = self.conv_act(sample)
         sample = self.conv_out(sample)
 
+        if self.use_peft_backend:
+            unscale_peft_layers(self, lora_scale)
+
         if not return_dict:
             return (sample,)
 

From b3a02be81a86ce0e8de04b00f17945f070c05827 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Wed, 27 Sep 2023 10:06:52 +0000
Subject: [PATCH 013/134] remove unneeded method

---
 src/diffusers/loaders.py | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index 7cbc2974e676..458623a94827 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -2214,23 +2214,6 @@ def unfuse_text_encoder_lora(text_encoder):
 
         self.num_fused_loras -= 1
 
-    @property
-    def lora_modules_to_scale(self):
-        """
-        Returns the list of the LoRA modules to scale
-        """
-        lora_modules = []
-
-        if hasattr(self, "text_encoder"):
-            lora_modules.append(self.text_encoder)
-
-        if hasattr(self, "unet"):
-            lora_modules.append(self.unet)
-
-        if hasattr(self, "text_encoder_2"):
-            lora_modules.append(self.text_encoder_2)
-        return lora_modules
-
 
 class FromSingleFileMixin:
     """

From cc135f2d0c950e8e174c3a6f50689c59a3fc2062 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Wed, 27 Sep 2023 10:09:15 +0000
Subject: [PATCH 014/134] active adapters!

---
 src/diffusers/models/modeling_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
index 9aa323939bd3..205ec141db97 100644
--- a/src/diffusers/models/modeling_utils.py
+++ b/src/diffusers/models/modeling_utils.py
@@ -407,12 +407,12 @@ def enable_adapters(self) -> None:
             if isinstance(module, BaseTunerLayer):
                 module.disable_adapters = False
 
-    def active_adapter(self) -> str:
+    def active_adapters(self) -> List[str]:
         """
         If you are not familiar with adapters and PEFT methods, we invite you to read more about them on the PEFT
         official documentation: https://huggingface.co/docs/peft
 
-        Gets the current active adapter of the model.
+        Gets the current list of active adapters of the model.
         """
         check_peft_version(min_version=MIN_PEFT_VERSION)
 

From a09530cb4b05e2fa92a1b4475f29537145ee1f46 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Wed, 27 Sep 2023 14:20:57 +0000
Subject: [PATCH 015/134] fix merge conflicts

---
 src/diffusers/loaders.py          | 4 ++--
 src/diffusers/models/lora.py      | 4 ++--
 src/diffusers/utils/__init__.py   | 4 +++-
 src/diffusers/utils/peft_utils.py | 6 +++++-
 4 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index 5d5c40a492f4..f65117259a94 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -42,7 +42,7 @@
     is_transformers_available,
     logging,
     recurse_remove_peft_layers,
-    scale_lora_layers,
+    scale_peft_layers,
     set_adapter_layers,
     set_weights_and_activate_adapters,
 )
@@ -1650,7 +1650,7 @@ def load_lora_into_text_encoder(
                         peft_config=lora_config,
                     )
                     # scale LoRA layers with `lora_scale`
-                    scale_lora_layers(text_encoder, weight=lora_scale)
+                    scale_peft_layers(text_encoder, scale=lora_scale)
 
                     is_model_cpu_offload = False
                     is_sequential_cpu_offload = False
diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py
index 7aae0914c458..cc8e3e231e2b 100644
--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -19,7 +19,7 @@
 from torch import nn
 
 from ..loaders import PatchedLoraProjection, text_encoder_attn_modules, text_encoder_mlp_modules
-from ..utils import logging, scale_lora_layers
+from ..utils import logging
 
 
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
@@ -36,7 +36,7 @@ def adjust_lora_scale_text_encoder(text_encoder, lora_scale: float = 1.0):
     for _, mlp_module in text_encoder_mlp_modules(text_encoder):
         if isinstance(mlp_module.fc1, PatchedLoraProjection):
             mlp_module.fc1.lora_scale = lora_scale
-            mlp_module.fc2.lora_scale = lora_scale   
+            mlp_module.fc2.lora_scale = lora_scale
 
 
 class LoRALinearLayer(nn.Module):
diff --git a/src/diffusers/utils/__init__.py b/src/diffusers/utils/__init__.py
index df5130eab683..72d5be805842 100644
--- a/src/diffusers/utils/__init__.py
+++ b/src/diffusers/utils/__init__.py
@@ -87,11 +87,13 @@
 from .peft_utils import (
     MIN_PEFT_VERSION,
     check_peft_version,
+    get_adapter_name,
+    get_peft_kwargs,
     recurse_remove_peft_layers,
     scale_peft_layers,
-    unscale_peft_layers,
     set_adapter_layers,
     set_weights_and_activate_adapters,
+    unscale_peft_layers,
 )
 from .pil_utils import PIL_INTERPOLATION, make_image_grid, numpy_to_pil, pt_to_pil
 from .state_dict_utils import (
diff --git a/src/diffusers/utils/peft_utils.py b/src/diffusers/utils/peft_utils.py
index 90c05abc21b3..775d293af7f4 100644
--- a/src/diffusers/utils/peft_utils.py
+++ b/src/diffusers/utils/peft_utils.py
@@ -19,6 +19,9 @@
 from .import_utils import is_torch_available
 
 
+MIN_PEFT_VERSION = "0.5.0"
+
+
 def recurse_remove_peft_layers(model):
     if is_torch_available():
         import torch
@@ -151,6 +154,7 @@ def unscale_peft_layers(model, scale: float = None):
                 if len(module.scaling["_hf_peft_original_scales"]) == 0:
                     del module.scaling["_hf_peft_original_scales"]
 
+
 def get_peft_kwargs(rank_dict, network_alpha_dict, peft_state_dict):
     rank_pattern = {}
     alpha_pattern = {}
@@ -226,4 +230,4 @@ def set_weights_and_activate_adapters(model, adapter_names, weights):
             if hasattr(module, "set_adapter"):
                 module.set_adapter(adapter_names)
             else:
-                module.active_adapter = adapter_names
\ No newline at end of file
+                module.active_adapter = adapter_names

From d3ce0927b421b1e5634e26ce05b3106f2fb369e8 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Wed, 27 Sep 2023 14:23:01 +0000
Subject: [PATCH 016/134] up

---
 src/diffusers/utils/peft_utils.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/src/diffusers/utils/peft_utils.py b/src/diffusers/utils/peft_utils.py
index 775d293af7f4..d234cf198882 100644
--- a/src/diffusers/utils/peft_utils.py
+++ b/src/diffusers/utils/peft_utils.py
@@ -231,3 +231,23 @@ def set_weights_and_activate_adapters(model, adapter_names, weights):
                 module.set_adapter(adapter_names)
             else:
                 module.active_adapter = adapter_names
+
+
+def check_peft_version(min_version: str) -> None:
+    r"""
+    Checks if the version of PEFT is compatible.
+
+    Args:
+        version (`str`):
+            The version of PEFT to check against.
+    """
+    if not is_peft_available():
+        raise ValueError("PEFT is not installed. Please install it with `pip install peft`")
+
+    is_peft_version_compatible = version.parse(importlib.metadata.version("peft")) > version.parse(min_version)
+
+    if not is_peft_version_compatible:
+        raise ValueError(
+            f"The version of PEFT you are using is not compatible, please use a version that is greater"
+            f" than {min_version}"
+        )
\ No newline at end of file

From 9e500d2c5e3a898966a4c91393f57adf32ccc6aa Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Wed, 27 Sep 2023 14:25:11 +0000
Subject: [PATCH 017/134] up

---
 src/diffusers/utils/peft_utils.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/diffusers/utils/peft_utils.py b/src/diffusers/utils/peft_utils.py
index d234cf198882..7a74b7373daf 100644
--- a/src/diffusers/utils/peft_utils.py
+++ b/src/diffusers/utils/peft_utils.py
@@ -15,8 +15,11 @@
 PEFT utilities: Utilities related to peft library
 """
 import collections
+import importlib
 
-from .import_utils import is_torch_available
+from packaging import version
+
+from .import_utils import is_peft_available, is_torch_available
 
 
 MIN_PEFT_VERSION = "0.5.0"
@@ -250,4 +253,4 @@ def check_peft_version(min_version: str) -> None:
         raise ValueError(
             f"The version of PEFT you are using is not compatible, please use a version that is greater"
             f" than {min_version}"
-        )
\ No newline at end of file
+        )

From edaea14bb5cb6e9c5f2fdad0ac46ecd964de7084 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Wed, 27 Sep 2023 15:20:03 +0000
Subject: [PATCH 018/134] kohya - test-1

---
 src/diffusers/loaders.py                | 3 ++-
 src/diffusers/utils/state_dict_utils.py | 6 ++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index f65117259a94..bc6e5389ca4b 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -1521,7 +1521,7 @@ def load_lora_into_unet(
 
             inject_adapter_in_model(lora_config, unet, adapter_name=adapter_name)
 
-            incompatible_keys = set_peft_model_state_dict(unet, state_dict)
+            incompatible_keys = set_peft_model_state_dict(unet, state_dict, adapter_name)
 
             if incompatible_keys is not None:
                 # check only for unexpected keys
@@ -1609,6 +1609,7 @@ def load_lora_into_text_encoder(
                         for name, _ in text_encoder_mlp_modules(text_encoder):
                             rank_key_fc1 = f"{name}.fc1.lora_B.weight"
                             rank_key_fc2 = f"{name}.fc2.lora_B.weight"
+
                             rank[rank_key_fc1] = text_encoder_lora_state_dict[rank_key_fc1].shape[1]
                             rank[rank_key_fc2] = text_encoder_lora_state_dict[rank_key_fc2].shape[1]
                 else:
diff --git a/src/diffusers/utils/state_dict_utils.py b/src/diffusers/utils/state_dict_utils.py
index 65f3f90c5ff4..aada09a6be7f 100644
--- a/src/diffusers/utils/state_dict_utils.py
+++ b/src/diffusers/utils/state_dict_utils.py
@@ -39,6 +39,8 @@ class StateDictType(enum.Enum):
     ".to_k_lora.up": ".to_k.lora_B",
     ".to_v_lora.down": ".to_v.lora_A",
     ".to_v_lora.up": ".to_v.lora_B",
+    ".lora.up": ".lora_B",
+    ".lora.down": ".lora_A",
     ".processor.": ".",
 }
 
@@ -52,6 +54,8 @@ class StateDictType(enum.Enum):
     ".v_proj.lora_linear_layer.down": ".v_proj.lora_A",
     ".out_proj.lora_linear_layer.up": ".out_proj.lora_B",
     ".out_proj.lora_linear_layer.down": ".out_proj.lora_A",
+    ".lora_linear_layer.up": ".lora_B",
+    ".lora_linear_layer.down": ".lora_A",
 }
 
 DIFFUSERS_OLD_TO_PEFT = {
@@ -63,6 +67,8 @@ class StateDictType(enum.Enum):
     ".to_v_lora.down": ".v_proj.lora_A",
     ".to_out_lora.up": ".out_proj.lora_B",
     ".to_out_lora.down": ".out_proj.lora_A",
+    ".lora_linear_layer.up": ".lora_B",
+    ".lora_linear_layer.down": ".lora_A",
 }
 
 PEFT_TO_DIFFUSERS = {

From 878150605aa0ac81c779c2844deedbfb1ab462d7 Mon Sep 17 00:00:00 2001
From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com>
Date: Wed, 27 Sep 2023 17:35:15 +0200
Subject: [PATCH 019/134] Apply suggestions from code review

Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
---
 src/diffusers/models/unet_2d_condition.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/diffusers/models/unet_2d_condition.py b/src/diffusers/models/unet_2d_condition.py
index 0857a668dfaa..f4a8e575c438 100644
--- a/src/diffusers/models/unet_2d_condition.py
+++ b/src/diffusers/models/unet_2d_condition.py
@@ -941,6 +941,7 @@ def forward(
         # 3. down
         lora_scale = cross_attention_kwargs.get("scale", 1.0) if cross_attention_kwargs is not None else 1.0
         if self.use_peft_backend:
+            # weight the lora layers by setting `lora_scale` for each PEFT layer
             scale_peft_layers(self, lora_scale)
 
         is_controlnet = mid_block_additional_residual is not None and down_block_additional_residuals is not None
@@ -1042,6 +1043,7 @@ def forward(
         sample = self.conv_out(sample)
 
         if self.use_peft_backend:
+            # remove `lora_scale` from each PEFT layer
             unscale_peft_layers(self, lora_scale)
 
         if not return_dict:

From 0a1457396c17b2836f5aa22f64c45cae86bcdd3b Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Wed, 27 Sep 2023 16:52:06 +0000
Subject: [PATCH 020/134] fix scale

---
 src/diffusers/loaders.py                      |  4 +-
 src/diffusers/models/unet_2d_condition.py     |  6 +-
 .../alt_diffusion/pipeline_alt_diffusion.py   |  9 ++-
 .../pipeline_alt_diffusion_img2img.py         | 11 +--
 .../controlnet/pipeline_controlnet.py         |  9 ++-
 .../controlnet/pipeline_controlnet_img2img.py | 11 +--
 .../controlnet/pipeline_controlnet_inpaint.py |  9 ++-
 .../pipeline_controlnet_inpaint_sd_xl.py      | 17 ++--
 .../controlnet/pipeline_controlnet_sd_xl.py   | 15 ++--
 .../pipeline_controlnet_sd_xl_img2img.py      | 15 ++--
 .../pipeline_cycle_diffusion.py               |  9 ++-
 .../pipeline_stable_diffusion.py              |  9 ++-
 ...line_stable_diffusion_attend_and_excite.py |  9 ++-
 .../pipeline_stable_diffusion_depth2img.py    |  9 ++-
 .../pipeline_stable_diffusion_diffedit.py     | 11 +--
 .../pipeline_stable_diffusion_gligen.py       |  9 ++-
 ...line_stable_diffusion_gligen_text_image.py |  9 ++-
 .../pipeline_stable_diffusion_img2img.py      | 11 +--
 .../pipeline_stable_diffusion_inpaint.py      |  9 ++-
 ...ipeline_stable_diffusion_inpaint_legacy.py |  9 ++-
 .../pipeline_stable_diffusion_k_diffusion.py  |  9 ++-
 .../pipeline_stable_diffusion_ldm3d.py        |  9 ++-
 ...pipeline_stable_diffusion_model_editing.py |  9 ++-
 .../pipeline_stable_diffusion_panorama.py     |  9 ++-
 .../pipeline_stable_diffusion_paradigms.py    |  9 ++-
 .../pipeline_stable_diffusion_pix2pix_zero.py | 11 +--
 .../pipeline_stable_diffusion_sag.py          |  9 ++-
 .../pipeline_stable_diffusion_upscale.py      |  9 ++-
 .../pipeline_stable_unclip.py                 |  9 ++-
 .../pipeline_stable_unclip_img2img.py         |  9 ++-
 .../pipeline_stable_diffusion_xl.py           | 17 ++--
 .../pipeline_stable_diffusion_xl_img2img.py   | 17 ++--
 .../pipeline_stable_diffusion_xl_inpaint.py   | 17 ++--
 .../pipeline_stable_diffusion_adapter.py      | 11 +--
 .../pipeline_stable_diffusion_xl_adapter.py   | 15 ++--
 .../pipeline_text_to_video_synth.py           |  9 ++-
 .../pipeline_text_to_video_synth_img2img.py   |  9 ++-
 .../versatile_diffusion/modeling_text_unet.py |  8 +-
 src/diffusers/utils/__init__.py               |  4 +-
 src/diffusers/utils/peft_utils.py             | 80 ++++---------------
 40 files changed, 221 insertions(+), 258 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index bc6e5389ca4b..0053f4812a3f 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -42,7 +42,7 @@
     is_transformers_available,
     logging,
     recurse_remove_peft_layers,
-    scale_peft_layers,
+    scale_lora_layers,
     set_adapter_layers,
     set_weights_and_activate_adapters,
 )
@@ -1651,7 +1651,7 @@ def load_lora_into_text_encoder(
                         peft_config=lora_config,
                     )
                     # scale LoRA layers with `lora_scale`
-                    scale_peft_layers(text_encoder, scale=lora_scale)
+                    scale_lora_layers(text_encoder, weight=lora_scale)
 
                     is_model_cpu_offload = False
                     is_sequential_cpu_offload = False
diff --git a/src/diffusers/models/unet_2d_condition.py b/src/diffusers/models/unet_2d_condition.py
index f4a8e575c438..8959331a0dc0 100644
--- a/src/diffusers/models/unet_2d_condition.py
+++ b/src/diffusers/models/unet_2d_condition.py
@@ -20,7 +20,7 @@
 
 from ..configuration_utils import ConfigMixin, register_to_config
 from ..loaders import UNet2DConditionLoadersMixin
-from ..utils import BaseOutput, logging, scale_peft_layers, unscale_peft_layers
+from ..utils import BaseOutput, logging, scale_lora_layers, unscale_lora_layers
 from .activations import get_activation
 from .attention_processor import (
     ADDED_KV_ATTENTION_PROCESSORS,
@@ -942,7 +942,7 @@ def forward(
         lora_scale = cross_attention_kwargs.get("scale", 1.0) if cross_attention_kwargs is not None else 1.0
         if self.use_peft_backend:
             # weight the lora layers by setting `lora_scale` for each PEFT layer
-            scale_peft_layers(self, lora_scale)
+            scale_lora_layers(self, lora_scale)
 
         is_controlnet = mid_block_additional_residual is not None and down_block_additional_residuals is not None
         is_adapter = mid_block_additional_residual is None and down_block_additional_residuals is not None
@@ -1044,7 +1044,7 @@ def forward(
 
         if self.use_peft_backend:
             # remove `lora_scale` from each PEFT layer
-            unscale_peft_layers(self, lora_scale)
+            unscale_lora_layers(self)
 
         if not return_dict:
             return (sample,)
diff --git a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py
index e8219b3e3cfc..45cbaf6a1e19 100644
--- a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py
+++ b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py
@@ -25,7 +25,7 @@
 from ...models import AutoencoderKL, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import deprecate, logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
+from ...utils import deprecate, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -303,11 +303,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
+            # dynamically adjust the LoRA scale
             if not self.use_peft_backend:
-                # dynamically adjust the LoRA scale
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
-                scale_peft_layers(self.text_encoder, lora_scale)
+                scale_lora_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -433,7 +433,8 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
         if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
+            # Retrieve the original scale by scaling back the LoRA layers
+            unscale_lora_layers(self.text_encoder)
 
         return prompt_embeds, negative_prompt_embeds
 
diff --git a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py
index 80978d052c55..d056a2f300bb 100644
--- a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py
+++ b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py
@@ -32,8 +32,8 @@
     deprecate,
     logging,
     replace_example_docstring,
-    scale_peft_layers,
-    unscale_peft_layers,
+    scale_lora_layers,
+    unscale_lora_layers,
 )
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
@@ -308,11 +308,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
+            # dynamically adjust the LoRA scale
             if not self.use_peft_backend:
-                # dynamically adjust the LoRA scale
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
-                scale_peft_layers(self.text_encoder, lora_scale)
+                scale_lora_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -438,7 +438,8 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
         if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
+            # Retrieve the original scale by scaling back the LoRA layers
+            unscale_lora_layers(self.text_encoder)
 
         return prompt_embeds, negative_prompt_embeds
 
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py
index c29254233931..bfc8fd3a3c38 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py
@@ -27,7 +27,7 @@
 from ...models import AutoencoderKL, ControlNetModel, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import deprecate, logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
+from ...utils import deprecate, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
 from ...utils.torch_utils import is_compiled_module, randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from ..stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
@@ -286,11 +286,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
+            # dynamically adjust the LoRA scale
             if not self.use_peft_backend:
-                # dynamically adjust the LoRA scale
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
-                scale_peft_layers(self.text_encoder, lora_scale)
+                scale_lora_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -416,7 +416,8 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
         if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
+            # Retrieve the original scale by scaling back the LoRA layers
+            unscale_lora_layers(self.text_encoder)
 
         return prompt_embeds, negative_prompt_embeds
 
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py
index e535425937c6..0a1daf64e544 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py
@@ -30,8 +30,8 @@
     deprecate,
     logging,
     replace_example_docstring,
-    scale_peft_layers,
-    unscale_peft_layers,
+    scale_lora_layers,
+    unscale_lora_layers,
 )
 from ...utils.torch_utils import is_compiled_module, randn_tensor
 from ..pipeline_utils import DiffusionPipeline
@@ -316,11 +316,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
+            # dynamically adjust the LoRA scale
             if not self.use_peft_backend:
-                # dynamically adjust the LoRA scale
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
-                scale_peft_layers(self.text_encoder, lora_scale)
+                scale_lora_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -446,7 +446,8 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
         if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
+            # Retrieve the original scale by scaling back the LoRA layers
+            unscale_lora_layers(self.text_encoder)
 
         return prompt_embeds, negative_prompt_embeds
 
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py
index 62de1d14fa83..6b3ccbce43a5 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py
@@ -28,7 +28,7 @@
 from ...models import AutoencoderKL, ControlNetModel, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import deprecate, logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
+from ...utils import deprecate, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
 from ...utils.torch_utils import is_compiled_module, randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from ..stable_diffusion import StableDiffusionPipelineOutput
@@ -437,11 +437,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
+            # dynamically adjust the LoRA scale
             if not self.use_peft_backend:
-                # dynamically adjust the LoRA scale
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
-                scale_peft_layers(self.text_encoder, lora_scale)
+                scale_lora_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -567,7 +567,8 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
         if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
+            # Retrieve the original scale by scaling back the LoRA layers
+            unscale_lora_layers(self.text_encoder)
 
         return prompt_embeds, negative_prompt_embeds
 
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py
index bb08b6e6f1ab..c038c818d281 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py
@@ -36,8 +36,8 @@
     is_invisible_watermark_available,
     logging,
     replace_example_docstring,
-    scale_peft_layers,
-    unscale_peft_layers,
+    scale_lora_layers,
+    unscale_lora_layers,
 )
 from ...utils.torch_utils import is_compiled_module, randn_tensor
 from ..pipeline_utils import DiffusionPipeline
@@ -316,13 +316,11 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             if not self.use_peft_backend:
-                # dynamically adjust the LoRA scale
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
                 adjust_lora_scale_text_encoder(self.text_encoder_2, lora_scale)
             else:
-                # dynamically adjust the LoRA scale
-                scale_peft_layers(self.text_encoder, lora_scale)
-                scale_peft_layers(self.text_encoder_2, lora_scale)
+                scale_lora_layers(self.text_encoder, lora_scale)
+                scale_lora_layers(self.text_encoder_2, lora_scale)
 
         prompt = [prompt] if isinstance(prompt, str) else prompt
 
@@ -460,10 +458,9 @@ def encode_prompt(
             )
 
         if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
-
-            if hasattr(self, "text_encoder_2"):
-                unscale_peft_layers(self.text_encoder_2, lora_scale)
+            # Retrieve the original scale by scaling back the LoRA layers
+            unscale_lora_layers(self.text_encoder)
+            unscale_lora_layers(self.text_encoder_2)
 
         return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
 
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py
index 60adf5ca5e5a..5ab7e979335e 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py
@@ -35,7 +35,7 @@
 )
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
+from ...utils import logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
 from ...utils.torch_utils import is_compiled_module, randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
@@ -285,13 +285,11 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             if not self.use_peft_backend:
-                # dynamically adjust the LoRA scale
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
                 adjust_lora_scale_text_encoder(self.text_encoder_2, lora_scale)
             else:
-                # dynamically adjust the LoRA scale
-                scale_peft_layers(self.text_encoder, lora_scale)
-                scale_peft_layers(self.text_encoder_2, lora_scale)
+                scale_lora_layers(self.text_encoder, lora_scale)
+                scale_lora_layers(self.text_encoder_2, lora_scale)
 
         prompt = [prompt] if isinstance(prompt, str) else prompt
 
@@ -429,10 +427,9 @@ def encode_prompt(
             )
 
         if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
-
-            if hasattr(self, "text_encoder_2"):
-                unscale_peft_layers(self.text_encoder_2, lora_scale)
+            # Retrieve the original scale by scaling back the LoRA layers
+            unscale_lora_layers(self.text_encoder)
+            unscale_lora_layers(self.text_encoder_2)
 
         return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
 
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py
index 83aee081d2f9..c1b038ec2eee 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py
@@ -35,7 +35,7 @@
 )
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
+from ...utils import logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
 from ...utils.torch_utils import is_compiled_module, randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
@@ -323,13 +323,11 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             if not self.use_peft_backend:
-                # dynamically adjust the LoRA scale
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
                 adjust_lora_scale_text_encoder(self.text_encoder_2, lora_scale)
             else:
-                # dynamically adjust the LoRA scale
-                scale_peft_layers(self.text_encoder, lora_scale)
-                scale_peft_layers(self.text_encoder_2, lora_scale)
+                scale_lora_layers(self.text_encoder, lora_scale)
+                scale_lora_layers(self.text_encoder_2, lora_scale)
 
         prompt = [prompt] if isinstance(prompt, str) else prompt
 
@@ -467,10 +465,9 @@ def encode_prompt(
             )
 
         if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
-
-            if hasattr(self, "text_encoder_2"):
-                unscale_peft_layers(self.text_encoder_2, lora_scale)
+            # Retrieve the original scale by scaling back the LoRA layers
+            unscale_lora_layers(self.text_encoder)
+            unscale_lora_layers(self.text_encoder_2)
 
         return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py
index e9786578f453..27384a6728ab 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py
@@ -27,7 +27,7 @@
 from ...models import AutoencoderKL, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import DDIMScheduler
-from ...utils import PIL_INTERPOLATION, deprecate, logging, scale_peft_layers, unscale_peft_layers
+from ...utils import PIL_INTERPOLATION, deprecate, logging, scale_lora_layers, unscale_lora_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from .pipeline_output import StableDiffusionPipelineOutput
@@ -307,11 +307,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
+            # dynamically adjust the LoRA scale
             if not self.use_peft_backend:
-                # dynamically adjust the LoRA scale
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
-                scale_peft_layers(self.text_encoder, lora_scale)
+                scale_lora_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -437,7 +437,8 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
         if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
+            # Retrieve the original scale by scaling back the LoRA layers
+            unscale_lora_layers(self.text_encoder)
 
         return prompt_embeds, negative_prompt_embeds
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
index 20f0a5864d04..148f70148995 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
@@ -25,7 +25,7 @@
 from ...models import AutoencoderKL, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import deprecate, logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
+from ...utils import deprecate, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from .pipeline_output import StableDiffusionPipelineOutput
@@ -296,11 +296,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
+            # dynamically adjust the LoRA scale
             if not self.use_peft_backend:
-                # dynamically adjust the LoRA scale
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
-                scale_peft_layers(self.text_encoder, lora_scale)
+                scale_lora_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -426,7 +426,8 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
         if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
+            # Retrieve the original scale by scaling back the LoRA layers
+            unscale_lora_layers(self.text_encoder)
 
         return prompt_embeds, negative_prompt_embeds
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py
index fa221b67f37d..f556f569ff15 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py
@@ -27,7 +27,7 @@
 from ...models.attention_processor import Attention
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import deprecate, logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
+from ...utils import deprecate, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import StableDiffusionPipelineOutput
@@ -331,11 +331,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
+            # dynamically adjust the LoRA scale
             if not self.use_peft_backend:
-                # dynamically adjust the LoRA scale
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
-                scale_peft_layers(self.text_encoder, lora_scale)
+                scale_lora_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -461,7 +461,8 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
         if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
+            # Retrieve the original scale by scaling back the LoRA layers
+            unscale_lora_layers(self.text_encoder)
 
         return prompt_embeds, negative_prompt_embeds
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py
index 41d548058b04..e56653a4da8b 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py
@@ -28,7 +28,7 @@
 from ...models import AutoencoderKL, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import PIL_INTERPOLATION, deprecate, logging, scale_peft_layers, unscale_peft_layers
+from ...utils import PIL_INTERPOLATION, deprecate, logging, scale_lora_layers, unscale_lora_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
 
@@ -212,11 +212,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
+            # dynamically adjust the LoRA scale
             if not self.use_peft_backend:
-                # dynamically adjust the LoRA scale
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
-                scale_peft_layers(self.text_encoder, lora_scale)
+                scale_lora_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -342,7 +342,8 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
         if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
+            # Retrieve the original scale by scaling back the LoRA layers
+            unscale_lora_layers(self.text_encoder)
 
         return prompt_embeds, negative_prompt_embeds
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py
index fb191a10479b..4a157b382063 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py
@@ -34,8 +34,8 @@
     deprecate,
     logging,
     replace_example_docstring,
-    scale_peft_layers,
-    unscale_peft_layers,
+    scale_lora_layers,
+    unscale_lora_layers,
 )
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
@@ -482,11 +482,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
+            # dynamically adjust the LoRA scale
             if not self.use_peft_backend:
-                # dynamically adjust the LoRA scale
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
-                scale_peft_layers(self.text_encoder, lora_scale)
+                scale_lora_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -612,7 +612,8 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
         if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
+            # Retrieve the original scale by scaling back the LoRA layers
+            unscale_lora_layers(self.text_encoder)
 
         return prompt_embeds, negative_prompt_embeds
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py
index 3798384a3e4a..bb98e727ed2a 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py
@@ -26,7 +26,7 @@
 from ...models.attention import GatedSelfAttentionDense
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import deprecate, logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
+from ...utils import deprecate, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import StableDiffusionPipelineOutput
@@ -273,11 +273,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
+            # dynamically adjust the LoRA scale
             if not self.use_peft_backend:
-                # dynamically adjust the LoRA scale
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
-                scale_peft_layers(self.text_encoder, lora_scale)
+                scale_lora_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -403,7 +403,8 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
         if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
+            # Retrieve the original scale by scaling back the LoRA layers
+            unscale_lora_layers(self.text_encoder)
 
         return prompt_embeds, negative_prompt_embeds
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen_text_image.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen_text_image.py
index 660be91ed47a..cdb9ae89be2c 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen_text_image.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen_text_image.py
@@ -32,7 +32,7 @@
 from ...models.attention import GatedSelfAttentionDense
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
+from ...utils import logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import StableDiffusionPipelineOutput
@@ -305,11 +305,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
+            # dynamically adjust the LoRA scale
             if not self.use_peft_backend:
-                # dynamically adjust the LoRA scale
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
-                scale_peft_layers(self.text_encoder, lora_scale)
+                scale_lora_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -435,7 +435,8 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
         if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
+            # Retrieve the original scale by scaling back the LoRA layers
+            unscale_lora_layers(self.text_encoder)
 
         return prompt_embeds, negative_prompt_embeds
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
index b229c696c927..6ced1eda4b38 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
@@ -32,8 +32,8 @@
     deprecate,
     logging,
     replace_example_docstring,
-    scale_peft_layers,
-    unscale_peft_layers,
+    scale_lora_layers,
+    unscale_lora_layers,
 )
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
@@ -303,11 +303,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
+            # dynamically adjust the LoRA scale
             if not self.use_peft_backend:
-                # dynamically adjust the LoRA scale
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
-                scale_peft_layers(self.text_encoder, lora_scale)
+                scale_lora_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -433,7 +433,8 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
         if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
+            # Retrieve the original scale by scaling back the LoRA layers
+            unscale_lora_layers(self.text_encoder)
 
         return prompt_embeds, negative_prompt_embeds
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
index 6985728b592b..72ffd3c6f669 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
@@ -27,7 +27,7 @@
 from ...models import AsymmetricAutoencoderKL, AutoencoderKL, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import deprecate, logging, scale_peft_layers, unscale_peft_layers
+from ...utils import deprecate, logging, scale_lora_layers, unscale_lora_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import StableDiffusionPipelineOutput
@@ -374,11 +374,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
+            # dynamically adjust the LoRA scale
             if not self.use_peft_backend:
-                # dynamically adjust the LoRA scale
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
-                scale_peft_layers(self.text_encoder, lora_scale)
+                scale_lora_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -504,7 +504,8 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
         if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
+            # Retrieve the original scale by scaling back the LoRA layers
+            unscale_lora_layers(self.text_encoder)
 
         return prompt_embeds, negative_prompt_embeds
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py
index ca947f3cfe18..75d0b465888b 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py
@@ -27,7 +27,7 @@
 from ...models import AutoencoderKL, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import PIL_INTERPOLATION, deprecate, logging, scale_peft_layers, unscale_peft_layers
+from ...utils import PIL_INTERPOLATION, deprecate, logging, scale_lora_layers, unscale_lora_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import StableDiffusionPipelineOutput
@@ -296,11 +296,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
+            # dynamically adjust the LoRA scale
             if not self.use_peft_backend:
-                # dynamically adjust the LoRA scale
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
-                scale_peft_layers(self.text_encoder, lora_scale)
+                scale_lora_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -426,7 +426,8 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
         if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
+            # Retrieve the original scale by scaling back the LoRA layers
+            unscale_lora_layers(self.text_encoder)
 
         return prompt_embeds, negative_prompt_embeds
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py
index ea6f7015489f..5c50ad56474d 100755
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py
@@ -24,7 +24,7 @@
 from ...loaders import LoraLoaderMixin, TextualInversionLoaderMixin
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import LMSDiscreteScheduler
-from ...utils import deprecate, logging, scale_peft_layers, unscale_peft_layers
+from ...utils import deprecate, logging, scale_lora_layers, unscale_lora_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import StableDiffusionPipelineOutput
@@ -210,11 +210,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
+            # dynamically adjust the LoRA scale
             if not self.use_peft_backend:
-                # dynamically adjust the LoRA scale
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
-                scale_peft_layers(self.text_encoder, lora_scale)
+                scale_lora_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -340,7 +340,8 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
         if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
+            # Retrieve the original scale by scaling back the LoRA layers
+            unscale_lora_layers(self.text_encoder)
 
         return prompt_embeds, negative_prompt_embeds
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py
index e74f1aa8585b..41fb96c26bd1 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py
@@ -26,7 +26,7 @@
 from ...models import AutoencoderKL, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import BaseOutput, deprecate, logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
+from ...utils import BaseOutput, deprecate, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from .safety_checker import StableDiffusionSafetyChecker
@@ -266,11 +266,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
+            # dynamically adjust the LoRA scale
             if not self.use_peft_backend:
-                # dynamically adjust the LoRA scale
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
-                scale_peft_layers(self.text_encoder, lora_scale)
+                scale_lora_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -396,7 +396,8 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
         if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
+            # Retrieve the original scale by scaling back the LoRA layers
+            unscale_lora_layers(self.text_encoder)
 
         return prompt_embeds, negative_prompt_embeds
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py
index 6ebfb4068fbd..3ebb6000aa6f 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py
@@ -24,7 +24,7 @@
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import PNDMScheduler
 from ...schedulers.scheduling_utils import SchedulerMixin
-from ...utils import deprecate, logging, scale_peft_layers, unscale_peft_layers
+from ...utils import deprecate, logging, scale_lora_layers, unscale_lora_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import StableDiffusionPipelineOutput
@@ -243,11 +243,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
+            # dynamically adjust the LoRA scale
             if not self.use_peft_backend:
-                # dynamically adjust the LoRA scale
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
-                scale_peft_layers(self.text_encoder, lora_scale)
+                scale_lora_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -373,7 +373,8 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
         if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
+            # Retrieve the original scale by scaling back the LoRA layers
+            unscale_lora_layers(self.text_encoder)
 
         return prompt_embeds, negative_prompt_embeds
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py
index d442d17e8d23..a18cc88dd491 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py
@@ -23,7 +23,7 @@
 from ...models import AutoencoderKL, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import DDIMScheduler
-from ...utils import deprecate, logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
+from ...utils import deprecate, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import StableDiffusionPipelineOutput
@@ -220,11 +220,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
+            # dynamically adjust the LoRA scale
             if not self.use_peft_backend:
-                # dynamically adjust the LoRA scale
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
-                scale_peft_layers(self.text_encoder, lora_scale)
+                scale_lora_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -350,7 +350,8 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
         if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
+            # Retrieve the original scale by scaling back the LoRA layers
+            unscale_lora_layers(self.text_encoder)
 
         return prompt_embeds, negative_prompt_embeds
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py
index d34ccb8e512e..82e9f97a1734 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py
@@ -23,7 +23,7 @@
 from ...models import AutoencoderKL, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import deprecate, logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
+from ...utils import deprecate, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import StableDiffusionPipelineOutput
@@ -251,11 +251,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
+            # dynamically adjust the LoRA scale
             if not self.use_peft_backend:
-                # dynamically adjust the LoRA scale
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
-                scale_peft_layers(self.text_encoder, lora_scale)
+                scale_lora_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -381,7 +381,8 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
         if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
+            # Retrieve the original scale by scaling back the LoRA layers
+            unscale_lora_layers(self.text_encoder)
 
         return prompt_embeds, negative_prompt_embeds
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py
index fadb3fb6b0e8..33592d87a9cb 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py
@@ -41,8 +41,8 @@
     deprecate,
     logging,
     replace_example_docstring,
-    scale_peft_layers,
-    unscale_peft_layers,
+    scale_lora_layers,
+    unscale_lora_layers,
 )
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
@@ -447,11 +447,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
+            # dynamically adjust the LoRA scale
             if not self.use_peft_backend:
-                # dynamically adjust the LoRA scale
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
-                scale_peft_layers(self.text_encoder, lora_scale)
+                scale_lora_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -577,7 +577,8 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
         if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
+            # Retrieve the original scale by scaling back the LoRA layers
+            unscale_lora_layers(self.text_encoder)
 
         return prompt_embeds, negative_prompt_embeds
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py
index c288da5fd64f..68f3740c7b9f 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py
@@ -24,7 +24,7 @@
 from ...models import AutoencoderKL, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import deprecate, logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
+from ...utils import deprecate, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import StableDiffusionPipelineOutput
@@ -243,11 +243,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
+            # dynamically adjust the LoRA scale
             if not self.use_peft_backend:
-                # dynamically adjust the LoRA scale
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
-                scale_peft_layers(self.text_encoder, lora_scale)
+                scale_lora_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -373,7 +373,8 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
         if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
+            # Retrieve the original scale by scaling back the LoRA layers
+            unscale_lora_layers(self.text_encoder)
 
         return prompt_embeds, negative_prompt_embeds
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
index 30436083b6b3..036255fe3b4f 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
@@ -32,7 +32,7 @@
 )
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import DDPMScheduler, KarrasDiffusionSchedulers
-from ...utils import deprecate, logging, scale_peft_layers, unscale_peft_layers
+from ...utils import deprecate, logging, scale_lora_layers, unscale_lora_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import StableDiffusionPipelineOutput
@@ -239,11 +239,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
+            # dynamically adjust the LoRA scale
             if not self.use_peft_backend:
-                # dynamically adjust the LoRA scale
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
-                scale_peft_layers(self.text_encoder, lora_scale)
+                scale_lora_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -369,7 +369,8 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
         if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
+            # Retrieve the original scale by scaling back the LoRA layers
+            unscale_lora_layers(self.text_encoder)
 
         return prompt_embeds, negative_prompt_embeds
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py
index fddc2057369c..dc42fbe78a15 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py
@@ -25,7 +25,7 @@
 from ...models.embeddings import get_timestep_embedding
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import deprecate, logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
+from ...utils import deprecate, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
 from .stable_unclip_image_normalizer import StableUnCLIPImageNormalizer
@@ -341,11 +341,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
+            # dynamically adjust the LoRA scale
             if not self.use_peft_backend:
-                # dynamically adjust the LoRA scale
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
-                scale_peft_layers(self.text_encoder, lora_scale)
+                scale_lora_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -471,7 +471,8 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
         if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
+            # Retrieve the original scale by scaling back the LoRA layers
+            unscale_lora_layers(self.text_encoder)
 
         return prompt_embeds, negative_prompt_embeds
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py
index 0aede914b42e..12b64d32852e 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py
@@ -25,7 +25,7 @@
 from ...models.embeddings import get_timestep_embedding
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import deprecate, logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
+from ...utils import deprecate, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
 from .stable_unclip_image_normalizer import StableUnCLIPImageNormalizer
@@ -295,11 +295,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
+            # dynamically adjust the LoRA scale
             if not self.use_peft_backend:
-                # dynamically adjust the LoRA scale
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
-                scale_peft_layers(self.text_encoder, lora_scale)
+                scale_lora_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -425,7 +425,8 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
         if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
+            # Retrieve the original scale by scaling back the LoRA layers
+            unscale_lora_layers(self.text_encoder)
 
         return prompt_embeds, negative_prompt_embeds
 
diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py
index fdbb0fdb1794..36c1bbe79fe5 100644
--- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py
+++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py
@@ -37,8 +37,8 @@
     is_invisible_watermark_available,
     logging,
     replace_example_docstring,
-    scale_peft_layers,
-    unscale_peft_layers,
+    scale_lora_layers,
+    unscale_lora_layers,
 )
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
@@ -266,13 +266,11 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             if not self.use_peft_backend:
-                # dynamically adjust the LoRA scale
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
                 adjust_lora_scale_text_encoder(self.text_encoder_2, lora_scale)
             else:
-                # dynamically adjust the LoRA scale
-                scale_peft_layers(self.text_encoder, lora_scale)
-                scale_peft_layers(self.text_encoder_2, lora_scale)
+                scale_lora_layers(self.text_encoder, lora_scale)
+                scale_lora_layers(self.text_encoder_2, lora_scale)
 
         prompt = [prompt] if isinstance(prompt, str) else prompt
 
@@ -410,10 +408,9 @@ def encode_prompt(
             )
 
         if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
-
-            if hasattr(self, "text_encoder_2"):
-                unscale_peft_layers(self.text_encoder_2, lora_scale)
+            # Retrieve the original scale by scaling back the LoRA layers
+            unscale_lora_layers(self.text_encoder)
+            unscale_lora_layers(self.text_encoder_2)
 
         return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
 
diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
index dfaaf33a31b4..2d843c0dfa43 100644
--- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
+++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
@@ -34,8 +34,8 @@
     is_invisible_watermark_available,
     logging,
     replace_example_docstring,
-    scale_peft_layers,
-    unscale_peft_layers,
+    scale_lora_layers,
+    unscale_lora_layers,
 )
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
@@ -273,13 +273,11 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             if not self.use_peft_backend:
-                # dynamically adjust the LoRA scale
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
                 adjust_lora_scale_text_encoder(self.text_encoder_2, lora_scale)
             else:
-                # dynamically adjust the LoRA scale
-                scale_peft_layers(self.text_encoder, lora_scale)
-                scale_peft_layers(self.text_encoder_2, lora_scale)
+                scale_lora_layers(self.text_encoder, lora_scale)
+                scale_lora_layers(self.text_encoder_2, lora_scale)
 
         prompt = [prompt] if isinstance(prompt, str) else prompt
 
@@ -417,10 +415,9 @@ def encode_prompt(
             )
 
         if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
-
-            if hasattr(self, "text_encoder_2"):
-                unscale_peft_layers(self.text_encoder_2, lora_scale)
+            # Retrieve the original scale by scaling back the LoRA layers
+            unscale_lora_layers(self.text_encoder)
+            unscale_lora_layers(self.text_encoder_2)
 
         return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
 
diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py
index bd607d76b538..d200bf7aacea 100644
--- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py
+++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py
@@ -36,8 +36,8 @@
     is_invisible_watermark_available,
     logging,
     replace_example_docstring,
-    scale_peft_layers,
-    unscale_peft_layers,
+    scale_lora_layers,
+    unscale_lora_layers,
 )
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
@@ -422,13 +422,11 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             if not self.use_peft_backend:
-                # dynamically adjust the LoRA scale
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
                 adjust_lora_scale_text_encoder(self.text_encoder_2, lora_scale)
             else:
-                # dynamically adjust the LoRA scale
-                scale_peft_layers(self.text_encoder, lora_scale)
-                scale_peft_layers(self.text_encoder_2, lora_scale)
+                scale_lora_layers(self.text_encoder, lora_scale)
+                scale_lora_layers(self.text_encoder_2, lora_scale)
 
         prompt = [prompt] if isinstance(prompt, str) else prompt
 
@@ -566,10 +564,9 @@ def encode_prompt(
             )
 
         if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
-
-            if hasattr(self, "text_encoder_2"):
-                unscale_peft_layers(self.text_encoder_2, lora_scale)
+            # Retrieve the original scale by scaling back the LoRA layers
+            unscale_lora_layers(self.text_encoder)
+            unscale_lora_layers(self.text_encoder_2)
 
         return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
 
diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py
index 810ba651be34..a9d80805aa56 100644
--- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py
+++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py
@@ -32,8 +32,8 @@
     deprecate,
     logging,
     replace_example_docstring,
-    scale_peft_layers,
-    unscale_peft_layers,
+    scale_lora_layers,
+    unscale_lora_layers,
 )
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
@@ -297,11 +297,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
+            # dynamically adjust the LoRA scale
             if not self.use_peft_backend:
-                # dynamically adjust the LoRA scale
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
-                scale_peft_layers(self.text_encoder, lora_scale)
+                scale_lora_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -427,7 +427,8 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
         if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
+            # Retrieve the original scale by scaling back the LoRA layers
+            unscale_lora_layers(self.text_encoder)
 
         return prompt_embeds, negative_prompt_embeds
 
diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py
index 0b0a437f605a..11c31e027e3e 100644
--- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py
+++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py
@@ -31,7 +31,7 @@
 )
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import PIL_INTERPOLATION, logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
+from ...utils import PIL_INTERPOLATION, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
@@ -283,13 +283,11 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             if not self.use_peft_backend:
-                # dynamically adjust the LoRA scale
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
                 adjust_lora_scale_text_encoder(self.text_encoder_2, lora_scale)
             else:
-                # dynamically adjust the LoRA scale
-                scale_peft_layers(self.text_encoder, lora_scale)
-                scale_peft_layers(self.text_encoder_2, lora_scale)
+                scale_lora_layers(self.text_encoder, lora_scale)
+                scale_lora_layers(self.text_encoder_2, lora_scale)
 
         prompt = [prompt] if isinstance(prompt, str) else prompt
 
@@ -427,10 +425,9 @@ def encode_prompt(
             )
 
         if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
-
-            if hasattr(self, "text_encoder_2"):
-                unscale_peft_layers(self.text_encoder_2, lora_scale)
+            # Retrieve the original scale by scaling back the LoRA layers
+            unscale_lora_layers(self.text_encoder)
+            unscale_lora_layers(self.text_encoder_2)
 
         return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
 
diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py
index 37056a6ef186..bd32b0f6b505 100644
--- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py
+++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py
@@ -23,7 +23,7 @@
 from ...models import AutoencoderKL, UNet3DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import deprecate, logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
+from ...utils import deprecate, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import TextToVideoSDPipelineOutput
@@ -223,11 +223,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
+            # dynamically adjust the LoRA scale
             if not self.use_peft_backend:
-                # dynamically adjust the LoRA scale
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
-                scale_peft_layers(self.text_encoder, lora_scale)
+                scale_lora_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -353,7 +353,8 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
         if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
+            # Retrieve the original scale by scaling back the LoRA layers
+            unscale_lora_layers(self.text_encoder)
 
         return prompt_embeds, negative_prompt_embeds
 
diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py
index 028fc59f1107..d14d6e46cd0f 100644
--- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py
+++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py
@@ -24,7 +24,7 @@
 from ...models import AutoencoderKL, UNet3DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import deprecate, logging, replace_example_docstring, scale_peft_layers, unscale_peft_layers
+from ...utils import deprecate, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import TextToVideoSDPipelineOutput
@@ -285,11 +285,11 @@ def encode_prompt(
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
             self._lora_scale = lora_scale
 
+            # dynamically adjust the LoRA scale
             if not self.use_peft_backend:
-                # dynamically adjust the LoRA scale
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
-                scale_peft_layers(self.text_encoder, lora_scale)
+                scale_lora_layers(self.text_encoder, lora_scale)
 
         if prompt is not None and isinstance(prompt, str):
             batch_size = 1
@@ -415,7 +415,8 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
         if self.use_peft_backend:
-            unscale_peft_layers(self.text_encoder, lora_scale)
+            # Retrieve the original scale by scaling back the LoRA layers
+            unscale_lora_layers(self.text_encoder)
 
         return prompt_embeds, negative_prompt_embeds
 
diff --git a/src/diffusers/pipelines/versatile_diffusion/modeling_text_unet.py b/src/diffusers/pipelines/versatile_diffusion/modeling_text_unet.py
index 9e5c6b56558f..0d94916d5bf9 100644
--- a/src/diffusers/pipelines/versatile_diffusion/modeling_text_unet.py
+++ b/src/diffusers/pipelines/versatile_diffusion/modeling_text_unet.py
@@ -31,7 +31,7 @@
 )
 from ...models.transformer_2d import Transformer2DModel
 from ...models.unet_2d_condition import UNet2DConditionOutput
-from ...utils import is_torch_version, logging, scale_peft_layers, unscale_peft_layers
+from ...utils import is_torch_version, logging, scale_lora_layers, unscale_lora_layers
 
 
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
@@ -1156,7 +1156,8 @@ def forward(
         # 3. down
         lora_scale = cross_attention_kwargs.get("scale", 1.0) if cross_attention_kwargs is not None else 1.0
         if self.use_peft_backend:
-            scale_peft_layers(self, lora_scale)
+            # weight the lora layers by setting `lora_scale` for each PEFT layer
+            scale_lora_layers(self, lora_scale)
 
         is_controlnet = mid_block_additional_residual is not None and down_block_additional_residuals is not None
         is_adapter = mid_block_additional_residual is None and down_block_additional_residuals is not None
@@ -1257,7 +1258,8 @@ def forward(
         sample = self.conv_out(sample)
 
         if self.use_peft_backend:
-            unscale_peft_layers(self, lora_scale)
+            # remove `lora_scale` from each PEFT layer
+            unscale_lora_layers(self, lora_scale)
 
         if not return_dict:
             return (sample,)
diff --git a/src/diffusers/utils/__init__.py b/src/diffusers/utils/__init__.py
index 72d5be805842..ada3eea302da 100644
--- a/src/diffusers/utils/__init__.py
+++ b/src/diffusers/utils/__init__.py
@@ -90,10 +90,10 @@
     get_adapter_name,
     get_peft_kwargs,
     recurse_remove_peft_layers,
-    scale_peft_layers,
+    scale_lora_layers,
     set_adapter_layers,
     set_weights_and_activate_adapters,
-    unscale_peft_layers,
+    unscale_lora_layers,
 )
 from .pil_utils import PIL_INTERPOLATION, make_image_grid, numpy_to_pil, pt_to_pil
 from .state_dict_utils import (
diff --git a/src/diffusers/utils/peft_utils.py b/src/diffusers/utils/peft_utils.py
index 7a74b7373daf..3b1a9b719cd0 100644
--- a/src/diffusers/utils/peft_utils.py
+++ b/src/diffusers/utils/peft_utils.py
@@ -78,84 +78,38 @@ def recurse_remove_peft_layers(model):
     return model
 
 
-def scale_peft_layers(model, scale: float = None):
-    r"""
-    Scale peft layers - Loops over the modules of the model and scale the layers that are of type `BaseTunerLayer`. We
-    also store the original scale factor in case we multiply it by zero.
+def scale_lora_layers(model, weight):
+    """
+    Adjust the weightage given to the LoRA layers of the model.
 
     Args:
         model (`torch.nn.Module`):
             The model to scale.
-        scale (`float`, *optional*):
-            The scale factor to use.
+        weight (`float`):
+            The weight to be given to the LoRA layers.
     """
     from peft.tuners.tuners_utils import BaseTunerLayer
 
-    if scale is not None and scale != 1.0:
-        for module in model.modules():
-            if isinstance(module, BaseTunerLayer):
-                # To deal with previous PEFT versions
-                active_adapters = module.active_adapter
-                if isinstance(active_adapters, str):
-                    active_adapters = [active_adapters]
-
-                for active_adapter in active_adapters:
-                    original_scale = module.scaling[active_adapter]
-
-                    # Store the previous scale in case we multiply it by zero
-                    if "_hf_peft_original_scales" not in module.scaling:
-                        module.scaling["_hf_peft_original_scales"] = {active_adapter: original_scale}
-                    else:
-                        module.scaling["_hf_peft_original_scales"][active_adapter] = original_scale
-
-                    module.scaling[active_adapter] *= scale
+    for module in model.modules():
+        if isinstance(module, BaseTunerLayer):
+            module.scale_layer(weight)
 
 
-def unscale_peft_layers(model, scale: float = None):
-    r"""
-    Un-scale peft layers - in case the modules has been zero-ed by a zero factor we retrieve the previous scale and
-    restore it. Otherwise, assuming the user uses the same scale factor, we just divide by the scale factor.
+def unscale_lora_layers(model):
+    """
+    Adjust bacl the weightage given to the LoRA layers of the model.
 
     Args:
         model (`torch.nn.Module`):
-            The model to unscale.
-        scale (`float`, *optional*):
-            The scale factor to use. If 0.0 is passed, we retrieve the original scale factor. In order to retrieve the
-            original factor the user needs first to call `scale_peft_layers` with the same scale factor.
+            The model to scale.
+        weight (`float`):
+            The weight to be given to the LoRA layers.
     """
     from peft.tuners.tuners_utils import BaseTunerLayer
 
-    if scale is not None and scale != 1.0 and scale != 0.0:
-        for module in model.modules():
-            if isinstance(module, BaseTunerLayer):
-                # To deal with previous PEFT versions
-                active_adapters = module.active_adapter
-                if isinstance(active_adapters, str):
-                    active_adapters = [active_adapters]
-
-                for active_adapter in active_adapters:
-                    module.scaling[active_adapter] /= scale
-    elif scale is not None and scale == 0.0:
-        for module in model.modules():
-            if isinstance(module, BaseTunerLayer):
-                if "_hf_peft_original_scales" not in module.scaling:
-                    raise ValueError(
-                        "The layer has not been scaled, cannot unscale it - please call first `scale_peft_layers`"
-                    )
-                # To deal with previous PEFT versions
-                active_adapters = module.active_adapter
-                if isinstance(active_adapters, str):
-                    active_adapters = [active_adapters]
-
-                for active_adapter in active_adapters:
-                    original_scale = module.scaling["_hf_peft_original_scales"][active_adapter]
-                    module.scaling[active_adapter] = original_scale
-
-                    del module.scaling["_hf_peft_original_scales"][active_adapter]
-
-                # Clean up ..
-                if len(module.scaling["_hf_peft_original_scales"]) == 0:
-                    del module.scaling["_hf_peft_original_scales"]
+    for module in model.modules():
+        if isinstance(module, BaseTunerLayer):
+            module.unscale_layer()
 
 
 def get_peft_kwargs(rank_dict, network_alpha_dict, peft_state_dict):

From c26c4189d1e498e00c4a9ccddd466e9e5b0150ff Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Wed, 27 Sep 2023 16:56:03 +0000
Subject: [PATCH 021/134] fix copies

---
 .../pipelines/versatile_diffusion/modeling_text_unet.py         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diffusers/pipelines/versatile_diffusion/modeling_text_unet.py b/src/diffusers/pipelines/versatile_diffusion/modeling_text_unet.py
index 0d94916d5bf9..3db8f39783f4 100644
--- a/src/diffusers/pipelines/versatile_diffusion/modeling_text_unet.py
+++ b/src/diffusers/pipelines/versatile_diffusion/modeling_text_unet.py
@@ -1259,7 +1259,7 @@ def forward(
 
         if self.use_peft_backend:
             # remove `lora_scale` from each PEFT layer
-            unscale_lora_layers(self, lora_scale)
+            unscale_lora_layers(self)
 
         if not return_dict:
             return (sample,)

From 6996b829f7eb3453e73a97d8ddf122ca1a31b3e3 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Wed, 27 Sep 2023 17:01:23 +0000
Subject: [PATCH 022/134] add comment

---
 src/diffusers/loaders.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index 0053f4812a3f..46f81602c063 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -506,6 +506,8 @@ def load_attn_procs(self, pretrained_model_name_or_path_or_dict: Union[str, Dict
                     )
                     attn_processors[key].load_state_dict(value_dict)
         elif self.use_peft_backend:
+            # In that case we have nothing to do as loading the adapter weights is already handled above by `set_peft_model_state_dict`
+            # on the Unet
             pass
         else:
             raise ValueError(

From 68912e407541ac94c00eb0bd6ca2d19ecac5b10a Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Thu, 28 Sep 2023 09:36:51 +0000
Subject: [PATCH 023/134] multi adapters

---
 src/diffusers/models/modeling_utils.py | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
index 205ec141db97..4a3cbecbd22c 100644
--- a/src/diffusers/models/modeling_utils.py
+++ b/src/diffusers/models/modeling_utils.py
@@ -334,7 +334,7 @@ def add_adapter(self, adapter_config, adapter_name: Optional[str] = None) -> Non
         inject_adapter_in_model(adapter_config, self, adapter_name)
         self.set_adapter(adapter_name)
 
-    def set_adapter(self, adapter_name: str) -> None:
+    def set_adapter(self, adapter_name: Union[str, List[str]]) -> None:
         """
         If you are not familiar with adapters and PEFT methods, we invite you to read more about them on the PEFT
         official documentation: https://huggingface.co/docs/peft
@@ -342,13 +342,20 @@ def set_adapter(self, adapter_name: str) -> None:
         Sets a specific adapter by forcing the model to use a that adapter and disable the other adapters.
 
         Args:
-            adapter_name (`str`):
-                The name of the adapter to set.
+            adapter_name (Union[str, List[str]])):
+                The list of adapters to set or the adapter name in case of single adapter.
         """
         check_peft_version(min_version=MIN_PEFT_VERSION)
 
         if not self._hf_peft_config_loaded:
             raise ValueError("No adapter loaded. Please load an adapter first.")
+        elif isinstance(adapter_name, list):
+            missing = set(adapter_name) - set(self.peft_config)
+            if len(missing) > 0:
+                raise ValueError(
+                    f"Following adapter(s) could not be found: {', '.join(missing)}. Make sure you are passing the correct adapter name(s)."
+                    f" current loaded adapters are: {list(self.peft_config.keys())}"
+                )
         elif adapter_name not in self.peft_config:
             raise ValueError(
                 f"Adapter with name {adapter_name} not found. Please pass the correct adapter name among {list(self.peft_config.keys())}"
@@ -362,6 +369,12 @@ def set_adapter(self, adapter_name: str) -> None:
             if isinstance(module, BaseTunerLayer):
                 if hasattr(module, "set_adapter"):
                     module.set_adapter(adapter_name)
+                # Previous versions of PEFT does not support multi-adapter inference
+                elif not hasattr(module, "set_adapter") and isinstance(adapter_name, list):
+                    raise ValueError(
+                        "You are trying to set multiple adapters and you have a PEFT version that does not support multi-adapter inference. Please upgrade to the latest version of PEFT."
+                        " `pip install -U peft` or `pip install -U git+https://github.com/huggingface/peft.git`"
+                    )
                 else:
                     module.active_adapter = adapter_name
                 _adapters_has_been_set = True

From 10e0e6146c017c7d968ef1d7787e1bedf43fc35b Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Thu, 28 Sep 2023 15:55:15 +0000
Subject: [PATCH 024/134] fix tests

---
 src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py | 2 +-
 .../pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py   | 2 +-
 src/diffusers/pipelines/controlnet/pipeline_controlnet.py       | 2 +-
 .../pipelines/controlnet/pipeline_controlnet_img2img.py         | 2 +-
 .../pipelines/controlnet/pipeline_controlnet_inpaint.py         | 2 +-
 .../pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py   | 2 +-
 src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py | 2 +-
 .../pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py   | 2 +-
 .../pipelines/stable_diffusion/pipeline_cycle_diffusion.py      | 2 +-
 .../pipelines/stable_diffusion/pipeline_stable_diffusion.py     | 2 +-
 .../pipeline_stable_diffusion_attend_and_excite.py              | 2 +-
 .../stable_diffusion/pipeline_stable_diffusion_depth2img.py     | 2 +-
 .../stable_diffusion/pipeline_stable_diffusion_diffedit.py      | 2 +-
 .../stable_diffusion/pipeline_stable_diffusion_gligen.py        | 2 +-
 .../pipeline_stable_diffusion_gligen_text_image.py              | 2 +-
 .../stable_diffusion/pipeline_stable_diffusion_img2img.py       | 2 +-
 .../stable_diffusion/pipeline_stable_diffusion_inpaint.py       | 2 +-
 .../pipeline_stable_diffusion_inpaint_legacy.py                 | 2 +-
 .../stable_diffusion/pipeline_stable_diffusion_k_diffusion.py   | 2 +-
 .../stable_diffusion/pipeline_stable_diffusion_ldm3d.py         | 2 +-
 .../stable_diffusion/pipeline_stable_diffusion_model_editing.py | 2 +-
 .../stable_diffusion/pipeline_stable_diffusion_panorama.py      | 2 +-
 .../stable_diffusion/pipeline_stable_diffusion_paradigms.py     | 2 +-
 .../stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py  | 2 +-
 .../pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py | 2 +-
 .../stable_diffusion/pipeline_stable_diffusion_upscale.py       | 2 +-
 .../pipelines/stable_diffusion/pipeline_stable_unclip.py        | 2 +-
 .../stable_diffusion/pipeline_stable_unclip_img2img.py          | 2 +-
 .../stable_diffusion_xl/pipeline_stable_diffusion_xl.py         | 2 +-
 .../stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py | 2 +-
 .../stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py | 2 +-
 .../pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py  | 2 +-
 .../t2i_adapter/pipeline_stable_diffusion_xl_adapter.py         | 2 +-
 .../text_to_video_synthesis/pipeline_text_to_video_synth.py     | 2 +-
 .../pipeline_text_to_video_synth_img2img.py                     | 2 +-
 35 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py
index 45cbaf6a1e19..8e6d99ff4d3b 100644
--- a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py
+++ b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py
@@ -432,7 +432,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py
index d056a2f300bb..562bb5f59c56 100644
--- a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py
+++ b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py
@@ -437,7 +437,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py
index bfc8fd3a3c38..6ffaac6800b4 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py
@@ -415,7 +415,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py
index 0a1daf64e544..9c8d6e753693 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py
@@ -445,7 +445,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py
index 6b3ccbce43a5..bca8bebcd60a 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py
@@ -566,7 +566,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py
index c038c818d281..9d1f81f77fc8 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py
@@ -457,7 +457,7 @@ def encode_prompt(
                 bs_embed * num_images_per_prompt, -1
             )
 
-        if self.use_peft_backend:
+        if isinstance(self, StableDiffusionXLLoraLoaderMixin) and self.use_peft_backend:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
             unscale_lora_layers(self.text_encoder_2)
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py
index 5ab7e979335e..96353ed349f2 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py
@@ -426,7 +426,7 @@ def encode_prompt(
                 bs_embed * num_images_per_prompt, -1
             )
 
-        if self.use_peft_backend:
+        if isinstance(self, StableDiffusionXLLoraLoaderMixin) and self.use_peft_backend:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
             unscale_lora_layers(self.text_encoder_2)
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py
index c1b038ec2eee..40125bfcd0c4 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py
@@ -464,7 +464,7 @@ def encode_prompt(
                 bs_embed * num_images_per_prompt, -1
             )
 
-        if self.use_peft_backend:
+        if isinstance(self, StableDiffusionXLLoraLoaderMixin) and self.use_peft_backend:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
             unscale_lora_layers(self.text_encoder_2)
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py
index 27384a6728ab..1752729e0992 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py
@@ -436,7 +436,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
index 148f70148995..da3834bf82f4 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
@@ -425,7 +425,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py
index f556f569ff15..7f09545bde88 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py
@@ -460,7 +460,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py
index e56653a4da8b..e5b334914f02 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py
@@ -341,7 +341,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py
index 4a157b382063..3e328da0939c 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py
@@ -611,7 +611,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py
index bb98e727ed2a..79dadb6fb568 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py
@@ -402,7 +402,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen_text_image.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen_text_image.py
index cdb9ae89be2c..fd8fe4775386 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen_text_image.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen_text_image.py
@@ -434,7 +434,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
index 6ced1eda4b38..2463a99f6ec5 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
@@ -432,7 +432,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
index 72ffd3c6f669..23f6935d8f8d 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
@@ -503,7 +503,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py
index 75d0b465888b..6dd7db93b9fc 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py
@@ -425,7 +425,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py
index 5c50ad56474d..a5c447792ff5 100755
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py
@@ -339,7 +339,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py
index 41fb96c26bd1..343fc7e5e12f 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py
@@ -395,7 +395,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py
index 3ebb6000aa6f..8e086541a1ad 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py
@@ -372,7 +372,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py
index a18cc88dd491..f544020ce012 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py
@@ -349,7 +349,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py
index 82e9f97a1734..4ad622bbc62b 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py
@@ -380,7 +380,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py
index 33592d87a9cb..2250dfc93b72 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py
@@ -576,7 +576,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py
index 68f3740c7b9f..dc59faeabdc3 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py
@@ -372,7 +372,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
index 036255fe3b4f..2c637e5142a4 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
@@ -368,7 +368,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py
index dc42fbe78a15..7bea2411c698 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py
@@ -470,7 +470,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py
index 12b64d32852e..7710105b46d7 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py
@@ -424,7 +424,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py
index 36c1bbe79fe5..ecf9136f30ea 100644
--- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py
+++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py
@@ -407,7 +407,7 @@ def encode_prompt(
                 bs_embed * num_images_per_prompt, -1
             )
 
-        if self.use_peft_backend:
+        if isinstance(self, StableDiffusionXLLoraLoaderMixin) and self.use_peft_backend:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
             unscale_lora_layers(self.text_encoder_2)
diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
index 2d843c0dfa43..d95df83521ce 100644
--- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
+++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
@@ -414,7 +414,7 @@ def encode_prompt(
                 bs_embed * num_images_per_prompt, -1
             )
 
-        if self.use_peft_backend:
+        if isinstance(self, StableDiffusionXLLoraLoaderMixin) and self.use_peft_backend:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
             unscale_lora_layers(self.text_encoder_2)
diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py
index d200bf7aacea..1059be6303b6 100644
--- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py
+++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py
@@ -563,7 +563,7 @@ def encode_prompt(
                 bs_embed * num_images_per_prompt, -1
             )
 
-        if self.use_peft_backend:
+        if isinstance(self, StableDiffusionXLLoraLoaderMixin) and self.use_peft_backend:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
             unscale_lora_layers(self.text_encoder_2)
diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py
index a9d80805aa56..7c1020792fea 100644
--- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py
+++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py
@@ -426,7 +426,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py
index 11c31e027e3e..fdcb4d821f4b 100644
--- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py
+++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py
@@ -424,7 +424,7 @@ def encode_prompt(
                 bs_embed * num_images_per_prompt, -1
             )
 
-        if self.use_peft_backend:
+        if isinstance(self, StableDiffusionXLLoraLoaderMixin) and self.use_peft_backend:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
             unscale_lora_layers(self.text_encoder_2)
diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py
index bd32b0f6b505..0445d600199f 100644
--- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py
+++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py
@@ -352,7 +352,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py
index d14d6e46cd0f..b6c35363de23 100644
--- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py
+++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py
@@ -414,7 +414,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 

From 99fec5776018462b30f11c501a1a2aa160dc4e42 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Tue, 3 Oct 2023 16:31:26 +0000
Subject: [PATCH 025/134] oops

---
 src/diffusers/loaders.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index 13e2b998674d..dc8d89b68435 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -23,10 +23,11 @@
 import safetensors
 import torch
 from huggingface_hub import hf_hub_download, model_info
+from packaging import version
 from torch import nn
 
+from . import __version__
 from .models import USE_PEFT_BACKEND
-
 from .models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT, load_model_dict_into_meta
 from .utils import (
     DIFFUSERS_CACHE,

From ac925f81321e95fc8168184c3346bf3d75404d5a Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Tue, 3 Oct 2023 18:32:39 +0000
Subject: [PATCH 026/134] v1 faster loading - in progress

---
 src/diffusers/loaders.py          | 49 ++++++++++++++++++++++++++-----
 src/diffusers/utils/__init__.py   |  1 +
 src/diffusers/utils/peft_utils.py | 37 +++++++++++++++++++++++
 3 files changed, 79 insertions(+), 8 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index dc8d89b68435..f165c07102a5 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -47,6 +47,7 @@
     scale_lora_layers,
     set_adapter_layers,
     set_weights_and_activate_adapters,
+    transform_state_dict_to_peft,
 )
 from .utils.import_utils import BACKENDS_MAPPING
 
@@ -1523,9 +1524,26 @@ def load_lora_into_unet(
                 target_modules=target_modules,
             )
 
-            inject_adapter_in_model(lora_config, unet, adapter_name=adapter_name)
+            ctx = nullcontext if not low_cpu_mem_usage else init_empty_weights
 
-            incompatible_keys = set_peft_model_state_dict(unet, state_dict, adapter_name)
+            with ctx():
+                inject_adapter_in_model(lora_config, unet, adapter_name=adapter_name)
+
+            if low_cpu_mem_usage:
+                device = next(iter(state_dict.values())).device
+                dtype = next(iter(state_dict.values())).dtype
+
+                # import pdb; pdb.set_trace()
+                state_dict = transform_state_dict_to_peft(state_dict, lora_config, adapter_name)
+
+                unexpected_keys = load_model_dict_into_meta(unet, state_dict, device=device, dtype=dtype)
+                incompatible_keys = None
+
+                if len(unexpected_keys) == 0:
+                    # At this point all LoRA layars has been loaded so we init back an empty state_dict
+                    state_dict = {}
+            else:
+                incompatible_keys = set_peft_model_state_dict(unet, state_dict, adapter_name)
 
             if incompatible_keys is not None:
                 # check only for unexpected keys
@@ -1648,12 +1666,27 @@ def load_lora_into_text_encoder(
                     if adapter_name is None:
                         adapter_name = get_adapter_name(text_encoder)
 
-                    # inject LoRA layers and load the state dict
-                    text_encoder.load_adapter(
-                        adapter_name=adapter_name,
-                        adapter_state_dict=text_encoder_lora_state_dict,
-                        peft_config=lora_config,
-                    )
+                    ctx = init_empty_weights if low_cpu_mem_usage else nullcontext
+                    with ctx():
+                        text_encoder.load_adapter(
+                            adapter_name=adapter_name,
+                            adapter_state_dict=text_encoder_lora_state_dict,
+                            peft_config=lora_config,
+                        )
+
+                    if low_cpu_mem_usage:
+                        device = next(iter(text_encoder_lora_state_dict.values())).device
+                        dtype = next(iter(text_encoder_lora_state_dict.values())).dtype
+
+                        # import pdb; pdb.set_trace()
+                        text_encoder_lora_state_dict = transform_state_dict_to_peft(
+                            text_encoder_lora_state_dict, lora_config, adapter_name
+                        )
+
+                        unexpected_keys = load_model_dict_into_meta(
+                            text_encoder, text_encoder_lora_state_dict, device=device, dtype=dtype
+                        )
+
                     # scale LoRA layers with `lora_scale`
                     scale_lora_layers(text_encoder, weight=lora_scale)
 
diff --git a/src/diffusers/utils/__init__.py b/src/diffusers/utils/__init__.py
index ada3eea302da..54982b46fef3 100644
--- a/src/diffusers/utils/__init__.py
+++ b/src/diffusers/utils/__init__.py
@@ -93,6 +93,7 @@
     scale_lora_layers,
     set_adapter_layers,
     set_weights_and_activate_adapters,
+    transform_state_dict_to_peft,
     unscale_lora_layers,
 )
 from .pil_utils import PIL_INTERPOLATION, make_image_grid, numpy_to_pil, pt_to_pil
diff --git a/src/diffusers/utils/peft_utils.py b/src/diffusers/utils/peft_utils.py
index 5e56548ce7b9..8d5f40dd073e 100644
--- a/src/diffusers/utils/peft_utils.py
+++ b/src/diffusers/utils/peft_utils.py
@@ -208,3 +208,40 @@ def check_peft_version(min_version: str) -> None:
             f"The version of PEFT you are using is not compatible, please use a version that is greater"
             f" than {min_version}"
         )
+
+
+def transform_state_dict_to_peft(state_dict, config, adapter_name):
+    """
+    Transformers the raw state dict to a peft format that expects a prefix for the adapter layers.
+
+    Args:
+        state_dict (`dict`):
+            The raw state dict of the model.
+        config (`PeftConfig`):
+            The peft config used to create the adapter weights
+        adapter_name (`str`):
+            The name of the adapter to be used.
+    """
+    from peft import PeftType
+
+    if config.peft_type in (PeftType.LORA, PeftType.LOHA, PeftType.ADALORA, PeftType.IA3):
+        peft_model_state_dict = {}
+        parameter_prefix = {
+            PeftType.IA3: "ia3_",
+            PeftType.LORA: "lora_",
+            PeftType.ADALORA: "lora_",
+            PeftType.LOHA: "hada_",
+        }[config.peft_type]
+        for k, v in state_dict.items():
+            if parameter_prefix in k:
+                suffix = k.split(parameter_prefix)[1]
+                if "." in suffix:
+                    suffix_to_replace = ".".join(suffix.split(".")[1:])
+                    k = k.replace(suffix_to_replace, f"{adapter_name}.{suffix_to_replace}")
+                else:
+                    k = f"{k}.{adapter_name}"
+                peft_model_state_dict[k] = v
+            else:
+                peft_model_state_dict[k] = v
+
+    return peft_model_state_dict

From ebb16cad656466f50824f8a3eaa77ea5a269acf0 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Wed, 4 Oct 2023 09:32:59 +0000
Subject: [PATCH 027/134] Revert "v1 faster loading - in progress"

This reverts commit ac925f81321e95fc8168184c3346bf3d75404d5a.
---
 src/diffusers/loaders.py          | 49 +++++--------------------------
 src/diffusers/utils/__init__.py   |  1 -
 src/diffusers/utils/peft_utils.py | 37 -----------------------
 3 files changed, 8 insertions(+), 79 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index f165c07102a5..dc8d89b68435 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -47,7 +47,6 @@
     scale_lora_layers,
     set_adapter_layers,
     set_weights_and_activate_adapters,
-    transform_state_dict_to_peft,
 )
 from .utils.import_utils import BACKENDS_MAPPING
 
@@ -1524,26 +1523,9 @@ def load_lora_into_unet(
                 target_modules=target_modules,
             )
 
-            ctx = nullcontext if not low_cpu_mem_usage else init_empty_weights
+            inject_adapter_in_model(lora_config, unet, adapter_name=adapter_name)
 
-            with ctx():
-                inject_adapter_in_model(lora_config, unet, adapter_name=adapter_name)
-
-            if low_cpu_mem_usage:
-                device = next(iter(state_dict.values())).device
-                dtype = next(iter(state_dict.values())).dtype
-
-                # import pdb; pdb.set_trace()
-                state_dict = transform_state_dict_to_peft(state_dict, lora_config, adapter_name)
-
-                unexpected_keys = load_model_dict_into_meta(unet, state_dict, device=device, dtype=dtype)
-                incompatible_keys = None
-
-                if len(unexpected_keys) == 0:
-                    # At this point all LoRA layars has been loaded so we init back an empty state_dict
-                    state_dict = {}
-            else:
-                incompatible_keys = set_peft_model_state_dict(unet, state_dict, adapter_name)
+            incompatible_keys = set_peft_model_state_dict(unet, state_dict, adapter_name)
 
             if incompatible_keys is not None:
                 # check only for unexpected keys
@@ -1666,27 +1648,12 @@ def load_lora_into_text_encoder(
                     if adapter_name is None:
                         adapter_name = get_adapter_name(text_encoder)
 
-                    ctx = init_empty_weights if low_cpu_mem_usage else nullcontext
-                    with ctx():
-                        text_encoder.load_adapter(
-                            adapter_name=adapter_name,
-                            adapter_state_dict=text_encoder_lora_state_dict,
-                            peft_config=lora_config,
-                        )
-
-                    if low_cpu_mem_usage:
-                        device = next(iter(text_encoder_lora_state_dict.values())).device
-                        dtype = next(iter(text_encoder_lora_state_dict.values())).dtype
-
-                        # import pdb; pdb.set_trace()
-                        text_encoder_lora_state_dict = transform_state_dict_to_peft(
-                            text_encoder_lora_state_dict, lora_config, adapter_name
-                        )
-
-                        unexpected_keys = load_model_dict_into_meta(
-                            text_encoder, text_encoder_lora_state_dict, device=device, dtype=dtype
-                        )
-
+                    # inject LoRA layers and load the state dict
+                    text_encoder.load_adapter(
+                        adapter_name=adapter_name,
+                        adapter_state_dict=text_encoder_lora_state_dict,
+                        peft_config=lora_config,
+                    )
                     # scale LoRA layers with `lora_scale`
                     scale_lora_layers(text_encoder, weight=lora_scale)
 
diff --git a/src/diffusers/utils/__init__.py b/src/diffusers/utils/__init__.py
index 54982b46fef3..ada3eea302da 100644
--- a/src/diffusers/utils/__init__.py
+++ b/src/diffusers/utils/__init__.py
@@ -93,7 +93,6 @@
     scale_lora_layers,
     set_adapter_layers,
     set_weights_and_activate_adapters,
-    transform_state_dict_to_peft,
     unscale_lora_layers,
 )
 from .pil_utils import PIL_INTERPOLATION, make_image_grid, numpy_to_pil, pt_to_pil
diff --git a/src/diffusers/utils/peft_utils.py b/src/diffusers/utils/peft_utils.py
index 8d5f40dd073e..5e56548ce7b9 100644
--- a/src/diffusers/utils/peft_utils.py
+++ b/src/diffusers/utils/peft_utils.py
@@ -208,40 +208,3 @@ def check_peft_version(min_version: str) -> None:
             f"The version of PEFT you are using is not compatible, please use a version that is greater"
             f" than {min_version}"
         )
-
-
-def transform_state_dict_to_peft(state_dict, config, adapter_name):
-    """
-    Transformers the raw state dict to a peft format that expects a prefix for the adapter layers.
-
-    Args:
-        state_dict (`dict`):
-            The raw state dict of the model.
-        config (`PeftConfig`):
-            The peft config used to create the adapter weights
-        adapter_name (`str`):
-            The name of the adapter to be used.
-    """
-    from peft import PeftType
-
-    if config.peft_type in (PeftType.LORA, PeftType.LOHA, PeftType.ADALORA, PeftType.IA3):
-        peft_model_state_dict = {}
-        parameter_prefix = {
-            PeftType.IA3: "ia3_",
-            PeftType.LORA: "lora_",
-            PeftType.ADALORA: "lora_",
-            PeftType.LOHA: "hada_",
-        }[config.peft_type]
-        for k, v in state_dict.items():
-            if parameter_prefix in k:
-                suffix = k.split(parameter_prefix)[1]
-                if "." in suffix:
-                    suffix_to_replace = ".".join(suffix.split(".")[1:])
-                    k = k.replace(suffix_to_replace, f"{adapter_name}.{suffix_to_replace}")
-                else:
-                    k = f"{k}.{adapter_name}"
-                peft_model_state_dict[k] = v
-            else:
-                peft_model_state_dict[k] = v
-
-    return peft_model_state_dict

From 81f886e571a57f26093e051bb4c8c74e75ac4e6e Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Wed, 4 Oct 2023 09:41:09 +0000
Subject: [PATCH 028/134] kohya same generation

---
 src/diffusers/loaders.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index dc8d89b68435..183e9131d908 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -1514,8 +1514,15 @@ def load_lora_into_unet(
             if not all(rank == current_rank for rank in ranks):
                 raise ValueError("Multi-rank not supported yet")
 
-            # TODO: support multi-alpha
-            alpha = current_rank
+            if network_alphas is not None:
+                alphas = set(network_alphas.values())
+                if len(alphas) == 1:
+                    alpha = alphas.pop()
+                # TODO: support multi-alpha
+                else:
+                    raise ValueError("Multi-alpha not supported yet")
+            else:
+                alpha = current_rank
 
             lora_config = LoraConfig(
                 r=current_rank,

From 7376deb0c7757e677009662b7e834d95cfc6b8de Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Wed, 4 Oct 2023 10:08:53 +0000
Subject: [PATCH 029/134] fix some slow tests

---
 src/diffusers/loaders.py          | 9 ++++++---
 src/diffusers/utils/peft_utils.py | 1 -
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index 183e9131d908..caa6975706c6 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -2126,9 +2126,12 @@ def unload_lora_weights(self):
         >>> ...
         ```
         """
-        for _, module in self.unet.named_modules():
-            if hasattr(module, "set_lora_layer"):
-                module.set_lora_layer(None)
+        if not self.use_peft_backend:
+            for _, module in self.unet.named_modules():
+                if hasattr(module, "set_lora_layer"):
+                    module.set_lora_layer(None)
+        else:
+            recurse_remove_peft_layers(self.unet)
 
         # Safe to call the following regardless of LoRA.
         self._remove_text_encoder_monkey_patch()
diff --git a/src/diffusers/utils/peft_utils.py b/src/diffusers/utils/peft_utils.py
index 5e56548ce7b9..f8884cf8f6b0 100644
--- a/src/diffusers/utils/peft_utils.py
+++ b/src/diffusers/utils/peft_utils.py
@@ -59,7 +59,6 @@ def recurse_remove_peft_layers(model):
                 module.padding,
                 module.dilation,
                 module.groups,
-                module.bias,
             ).to(module.weight.device)
 
             new_module.weight = module.weight

From ff82de4ddbd68597f1595483d2d59b42fa674c4a Mon Sep 17 00:00:00 2001
From: Sourab Mangrulkar <13534540+pacman100@users.noreply.github.com>
Date: Wed, 4 Oct 2023 17:02:02 +0530
Subject: [PATCH 030/134] peft integration features for unet lora

1. Support for Multiple ranks/alphas
2. Support for Multiple active adapters
3. Support for enabling/disabling LoRAs
---
 src/diffusers/loaders.py | 170 +++++++++++++++++++++++++--------------
 1 file changed, 109 insertions(+), 61 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index f165c07102a5..8ffe5489e9e3 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -680,6 +680,55 @@ def _unfuse_lora_apply(self, module):
         if hasattr(module, "_unfuse_lora"):
             module._unfuse_lora()
 
+    def set_adapters(
+        self,
+        adapter_names: Union[List[str], str],
+        weights: List[float] = None,
+    ):
+        """
+        Sets the adapter layers for the unet.
+
+        Args:
+            adapter_names (`List[str]` or `str`):
+                The names of the adapters to use.
+            weights (`List[float]`, *optional*):
+                The weights to use for the unet. If `None`, the weights are set to `1.0` for all the adapters.
+        """
+        if not self.use_peft_backend:
+            raise ValueError("PEFT backend is required for this method.")
+
+        def process_weights(adapter_names, weights):
+            if weights is None:
+                weights = [1.0] * len(adapter_names)
+            elif isinstance(weights, float):
+                weights = [weights]
+
+            if len(adapter_names) != len(weights):
+                raise ValueError(
+                    f"Length of adapter names {len(adapter_names)} is not equal to the length of the weights {len(weights)}"
+                )
+            return weights
+
+        adapter_names = [adapter_names] if isinstance(adapter_names, str) else adapter_names
+        weights = process_weights(adapter_names, weights)
+        set_weights_and_activate_adapters(self, adapter_names, weights)
+
+    def disable_lora(self):
+        """
+        Disables the LoRA layers for the unet.
+        """
+        if not self.use_peft_backend:
+            raise ValueError("PEFT backend is required for this method.")
+        set_adapter_layers(self, enabled=False)
+
+    def enable_lora(self):
+        """
+        Enables the LoRA layers for the unet.
+        """
+        if not self.use_peft_backend:
+            raise ValueError("PEFT backend is required for this method.")
+        set_adapter_layers(self, enabled=True)
+
 
 def load_textual_inversion_state_dicts(pretrained_model_name_or_paths, **kwargs):
     cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE)
@@ -1449,7 +1498,7 @@ def _maybe_map_sgm_blocks_to_diffusers(cls, state_dict, unet_config, delimiter="
 
     @classmethod
     def load_lora_into_unet(
-        cls, state_dict, network_alphas, unet, low_cpu_mem_usage=None, _pipeline=None, adapter_name="default"
+        cls, state_dict, network_alphas, unet, low_cpu_mem_usage=None, _pipeline=None, adapter_name=None
     ):
         """
         This will load the LoRA layers specified in `state_dict` into `unet`.
@@ -1469,7 +1518,8 @@ def load_lora_into_unet(
                 Only supported for PyTorch >= 1.9.0. If you are using an older version of PyTorch, setting this
                 argument to `True` will raise an error.
             adapter_name (`str`, *optional*):
-                The name of the adapter to load the weights into. By default we use `"default"`
+                Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
+                `default_{i}` where i is the total number of adapters being loaded.
         """
         low_cpu_mem_usage = low_cpu_mem_usage if low_cpu_mem_usage is not None else _LOW_CPU_MEM_USAGE_DEFAULT
         # If the serialization format is new (introduced in https://github.com/huggingface/diffusers/pull/2918),
@@ -1501,49 +1551,20 @@ def load_lora_into_unet(
 
             state_dict = convert_unet_state_dict_to_peft(state_dict)
 
-            target_modules = []
-            ranks = []
+            rank = {}
             for key in state_dict.keys():
-                # filter out the name
-                filtered_name = ".".join(key.split(".")[:-2])
-                target_modules.append(filtered_name)
                 if "lora_B" in key:
-                    rank = state_dict[key].shape[1]
-                    ranks.append(rank)
-
-            current_rank = ranks[0]
-            if not all(rank == current_rank for rank in ranks):
-                raise ValueError("Multi-rank not supported yet")
-
-            # TODO: support multi-alpha
-            alpha = current_rank
-
-            lora_config = LoraConfig(
-                r=current_rank,
-                lora_alpha=alpha,
-                target_modules=target_modules,
-            )
+                    rank[key] = state_dict[key].shape[1]
 
-            ctx = nullcontext if not low_cpu_mem_usage else init_empty_weights
+            lora_config_kwargs = get_peft_kwargs(rank, network_alphas, state_dict)
+            lora_config = LoraConfig(**lora_config_kwargs)
 
-            with ctx():
-                inject_adapter_in_model(lora_config, unet, adapter_name=adapter_name)
-
-            if low_cpu_mem_usage:
-                device = next(iter(state_dict.values())).device
-                dtype = next(iter(state_dict.values())).dtype
-
-                # import pdb; pdb.set_trace()
-                state_dict = transform_state_dict_to_peft(state_dict, lora_config, adapter_name)
+            # adapter_name
+            if adapter_name is None:
+                adapter_name = get_adapter_name(unet)
 
-                unexpected_keys = load_model_dict_into_meta(unet, state_dict, device=device, dtype=dtype)
-                incompatible_keys = None
-
-                if len(unexpected_keys) == 0:
-                    # At this point all LoRA layars has been loaded so we init back an empty state_dict
-                    state_dict = {}
-            else:
-                incompatible_keys = set_peft_model_state_dict(unet, state_dict, adapter_name)
+            inject_adapter_in_model(lora_config, unet, adapter_name=adapter_name)
+            incompatible_keys = set_peft_model_state_dict(unet, state_dict, adapter_name)
 
             if incompatible_keys is not None:
                 # check only for unexpected keys
@@ -1666,26 +1687,11 @@ def load_lora_into_text_encoder(
                     if adapter_name is None:
                         adapter_name = get_adapter_name(text_encoder)
 
-                    ctx = init_empty_weights if low_cpu_mem_usage else nullcontext
-                    with ctx():
-                        text_encoder.load_adapter(
-                            adapter_name=adapter_name,
-                            adapter_state_dict=text_encoder_lora_state_dict,
-                            peft_config=lora_config,
-                        )
-
-                    if low_cpu_mem_usage:
-                        device = next(iter(text_encoder_lora_state_dict.values())).device
-                        dtype = next(iter(text_encoder_lora_state_dict.values())).dtype
-
-                        # import pdb; pdb.set_trace()
-                        text_encoder_lora_state_dict = transform_state_dict_to_peft(
-                            text_encoder_lora_state_dict, lora_config, adapter_name
-                        )
-
-                        unexpected_keys = load_model_dict_into_meta(
-                            text_encoder, text_encoder_lora_state_dict, device=device, dtype=dtype
-                        )
+                    text_encoder.load_adapter(
+                        adapter_name=adapter_name,
+                        adapter_state_dict=text_encoder_lora_state_dict,
+                        peft_config=lora_config,
+                    )
 
                     # scale LoRA layers with `lora_scale`
                     scale_lora_layers(text_encoder, weight=lora_scale)
@@ -2274,7 +2280,7 @@ def unfuse_text_encoder_lora(text_encoder):
 
         self.num_fused_loras -= 1
 
-    def set_adapter_for_text_encoder(
+    def set_adapters_for_text_encoder(
         self,
         adapter_names: Union[List[str], str],
         text_encoder: Optional[PreTrainedModel] = None,
@@ -2349,6 +2355,48 @@ def enable_lora_for_text_encoder(self, text_encoder: Optional[PreTrainedModel] =
             raise ValueError("Text Encoder not found.")
         set_adapter_layers(self.text_encoder, enabled=True)
 
+    def set_adapters(
+        self,
+        adapter_names: Union[List[str], str],
+        unet_weights: List[float] = None,
+        te_weights: List[float] = None,
+        te2_weights: List[float] = None,
+    ):
+        # Handle the UNET
+        self.unet.set_adapters(adapter_names, unet_weights)
+
+        # Handle the Text Encoder
+        if hasattr(self, "text_encoder"):
+            self.set_adapters_for_text_encoder(adapter_names, self.text_encoder, te_weights)
+        if hasattr(self, "text_encoder_2"):
+            self.set_adapters_for_text_encoder(adapter_names, self.text_encoder_2, te2_weights)
+
+    def disable_lora(self):
+        if not self.use_peft_backend:
+            raise ValueError("PEFT backend is required for this method.")
+
+        # Disable unet adapters
+        self.unet.disable_lora()
+
+        # Disable text encoder adapters
+        if hasattr(self, "text_encoder"):
+            self.disable_lora_for_text_encoder(self.text_encoder)
+        if hasattr(self, "text_encoder_2"):
+            self.disable_lora_for_text_encoder(self.text_encoder_2)
+
+    def enable_lora(self):
+        if not self.use_peft_backend:
+            raise ValueError("PEFT backend is required for this method.")
+
+        # Enable unet adapters
+        self.unet.enable_lora()
+
+        # Enable text encoder adapters
+        if hasattr(self, "text_encoder"):
+            self.enable_lora_for_text_encoder(self.text_encoder)
+        if hasattr(self, "text_encoder_2"):
+            self.enable_lora_for_text_encoder(self.text_encoder_2)
+
 
 class FromSingleFileMixin:
     """

From 94403c1fd746747e0d8aed3e8a8e4e713038eff7 Mon Sep 17 00:00:00 2001
From: Sourab Mangrulkar <13534540+pacman100@users.noreply.github.com>
Date: Wed, 4 Oct 2023 17:46:20 +0530
Subject: [PATCH 031/134] fix `get_peft_kwargs`

---
 src/diffusers/utils/peft_utils.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/diffusers/utils/peft_utils.py b/src/diffusers/utils/peft_utils.py
index 8d5f40dd073e..41c6b43954b1 100644
--- a/src/diffusers/utils/peft_utils.py
+++ b/src/diffusers/utils/peft_utils.py
@@ -124,13 +124,16 @@ def get_peft_kwargs(rank_dict, network_alpha_dict, peft_state_dict):
         rank_pattern = dict(filter(lambda x: x[1] != r, rank_dict.items()))
         rank_pattern = {k.split(".lora_B.")[0]: v for k, v in rank_pattern.items()}
 
-    if network_alpha_dict is not None and len(set(network_alpha_dict.values())) > 1:
-        # get the alpha occuring the most number of times
-        lora_alpha = collections.Counter(network_alpha_dict.values()).most_common()[0][0]
-
-        # for modules with alpha different from the most occuring alpha, add it to the `alpha_pattern`
-        alpha_pattern = dict(filter(lambda x: x[1] != lora_alpha, network_alpha_dict.items()))
-        alpha_pattern = {".".join(k.split(".down.")[0].split(".")[:-1]): v for k, v in alpha_pattern.items()}
+    if network_alpha_dict is not None:
+        if len(set(network_alpha_dict.values())) > 1:
+            # get the alpha occuring the most number of times
+            lora_alpha = collections.Counter(network_alpha_dict.values()).most_common()[0][0]
+
+            # for modules with alpha different from the most occuring alpha, add it to the `alpha_pattern`
+            alpha_pattern = dict(filter(lambda x: x[1] != lora_alpha, network_alpha_dict.items()))
+            alpha_pattern = {".".join(k.split(".down.")[0].split(".")[:-1]): v for k, v in alpha_pattern.items()}
+        else:
+            lora_alpha = set(network_alpha_dict.values()).pop()
 
     # layer names without the Diffusers specific
     target_modules = list({name.split(".lora")[0] for name in peft_state_dict.keys()})

From e8fca9fc3187c6f40f35ccc1e4d72e031520d3df Mon Sep 17 00:00:00 2001
From: Sourab Mangrulkar <13534540+pacman100@users.noreply.github.com>
Date: Wed, 4 Oct 2023 18:42:08 +0530
Subject: [PATCH 032/134] Update loaders.py

---
 src/diffusers/loaders.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index 8ffe5489e9e3..9bd5db404273 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -47,7 +47,6 @@
     scale_lora_layers,
     set_adapter_layers,
     set_weights_and_activate_adapters,
-    transform_state_dict_to_peft,
 )
 from .utils.import_utils import BACKENDS_MAPPING
 

From 4f21a7bef278ca7617260de53a5ead6691650f31 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Wed, 4 Oct 2023 14:05:30 +0000
Subject: [PATCH 033/134] add some tests

---
 tests/lora/test_lora_layers_peft.py | 87 +++++++++++++++++++++++++++++
 1 file changed, 87 insertions(+)

diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index 18b0199ebc93..6f10b5f10b21 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -554,6 +554,93 @@ def test_simple_inference_with_text_unet_lora_and_scale(self):
         )
 
 
+    def test_simple_inference_with_text_lora_unet_fused(self):
+        """
+        Tests a simple inference with lora attached into text encoder + fuses the lora weights into base model
+        and makes sure it works as expected - with unet
+        """
+        components, _, text_lora_config, unet_lora_config = self.get_dummy_components()
+        pipe = self.pipeline_class(**components)
+        pipe = pipe.to(self.torch_device)
+        pipe.set_progress_bar_config(disable=None)
+        _, _, inputs = self.get_dummy_inputs(with_generator=False)
+
+        output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
+        self.assertTrue(output_no_lora.shape == (1, 64, 64, 3))
+
+        pipe.text_encoder.add_adapter(text_lora_config)
+        pipe.unet.add_adapter(unet_lora_config)
+
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
+
+        if self.has_two_text_encoders:
+            pipe.text_encoder_2.add_adapter(text_lora_config)
+            self.assertTrue(
+                self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
+            )
+
+        pipe.fuse_lora()
+        # Fusing should still keep the LoRA layers
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in unet")
+
+        if self.has_two_text_encoders:
+            self.assertTrue(
+                self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
+            )
+
+        ouput_fused = pipe(**inputs, generator=torch.manual_seed(0)).images
+        self.assertFalse(
+            np.allclose(ouput_fused, output_no_lora, atol=1e-3, rtol=1e-3), "Fused lora should change the output"
+        )
+
+
+    def test_simple_inference_with_text_unet_lora_unloaded(self):
+        """
+        Tests a simple inference with lora attached to text encoder and unet, then unloads the lora weights
+        and makes sure it works as expected
+        """
+        components, _, text_lora_config, unet_lora_config = self.get_dummy_components()
+        pipe = self.pipeline_class(**components)
+        pipe = pipe.to(self.torch_device)
+        pipe.set_progress_bar_config(disable=None)
+        _, _, inputs = self.get_dummy_inputs(with_generator=False)
+
+        output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
+        self.assertTrue(output_no_lora.shape == (1, 64, 64, 3))
+
+        pipe.text_encoder.add_adapter(text_lora_config)
+        pipe.unet.add_adapter(unet_lora_config)
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
+
+        if self.has_two_text_encoders:
+            pipe.text_encoder_2.add_adapter(text_lora_config)
+            self.assertTrue(
+                self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
+            )
+
+        pipe.unload_lora_weights()
+        # unloading should remove the LoRA layers
+        self.assertFalse(
+            self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly unloaded in text encoder"
+        )
+        self.assertFalse(
+            self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly unloaded in Unet"
+        )
+
+        if self.has_two_text_encoders:
+            self.assertFalse(
+                self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly unloaded in text encoder 2"
+            )
+
+        ouput_unloaded = pipe(**inputs, generator=torch.manual_seed(0)).images
+        self.assertTrue(
+            np.allclose(ouput_unloaded, output_no_lora, atol=1e-3, rtol=1e-3), "Fused lora should change the output"
+        )
+
+
 class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
     pipeline_class = StableDiffusionPipeline
     scheduler_cls = DDIMScheduler

From 3568e7f763d5a3ff53cb3e4358ac0214b6672f8c Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Wed, 4 Oct 2023 14:18:18 +0000
Subject: [PATCH 034/134] add unfuse tests

---
 src/diffusers/loaders.py            | 10 +++++-
 tests/lora/test_lora_layers_peft.py | 48 +++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index caa6975706c6..e44b501a5fb5 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -2216,7 +2216,15 @@ def unfuse_lora(self, unfuse_unet: bool = True, unfuse_text_encoder: bool = True
                 LoRA parameters then it won't have any effect.
         """
         if unfuse_unet:
-            self.unet.unfuse_lora()
+            if not self.use_peft_backend:
+                self.unet.unfuse_lora()
+            else:
+                from peft.tuners.tuners_utils import BaseTunerLayer
+
+                for module in self.unet.modules():
+                    if isinstance(module, BaseTunerLayer):
+                        module.unmerge()
+
 
         if self.use_peft_backend:
             from peft.tuners.tuners_utils import BaseTunerLayer
diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index 6f10b5f10b21..fc76f5f3b731 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -641,6 +641,54 @@ def test_simple_inference_with_text_unet_lora_unloaded(self):
         )
 
 
+    def test_simple_inference_with_text_unet_lora_unloaded(self):
+        """
+        Tests a simple inference with lora attached to text encoder and unet, then unloads the lora weights
+        and makes sure it works as expected
+        """
+        components, _, text_lora_config, unet_lora_config = self.get_dummy_components()
+        pipe = self.pipeline_class(**components)
+        pipe = pipe.to(self.torch_device)
+        pipe.set_progress_bar_config(disable=None)
+        _, _, inputs = self.get_dummy_inputs(with_generator=False)
+
+        pipe.text_encoder.add_adapter(text_lora_config)
+        pipe.unet.add_adapter(unet_lora_config)
+
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
+
+        if self.has_two_text_encoders:
+            pipe.text_encoder_2.add_adapter(text_lora_config)
+            self.assertTrue(
+                self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
+            )
+
+        pipe.fuse_lora()
+
+        output_fused_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
+
+        pipe.unfuse_lora()
+
+        output_unfused_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
+        # unloading should remove the LoRA layers
+        self.assertTrue(
+            self.check_if_lora_correctly_set(pipe.text_encoder), "Unfuse should still keep LoRA layers"
+        )
+        self.assertTrue(
+            self.check_if_lora_correctly_set(pipe.unet), "Unfuse should still keep LoRA layers"
+        )
+
+        if self.has_two_text_encoders:
+            self.assertFalse(
+                self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly unloaded in text encoder 2"
+            )
+
+        self.assertTrue(
+            np.allclose(output_fused_lora, output_unfused_lora, atol=1e-3, rtol=1e-3), "Fused lora should change the output"
+        )
+
+
 class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
     pipeline_class = StableDiffusionPipeline
     scheduler_cls = DDIMScheduler

From 459285f3c19620aed8a4db6e174d9324b1038cc1 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Wed, 4 Oct 2023 14:19:06 +0000
Subject: [PATCH 035/134] fix tests

---
 tests/lora/test_lora_layers_peft.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index fc76f5f3b731..b10c1be896b4 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -680,10 +680,11 @@ def test_simple_inference_with_text_unet_lora_unloaded(self):
         )
 
         if self.has_two_text_encoders:
-            self.assertFalse(
-                self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly unloaded in text encoder 2"
+            self.assertTrue(
+                self.check_if_lora_correctly_set(pipe.text_encoder_2), "Unfuse should still keep LoRA layers"
             )
 
+        # Fuse and unfuse should lead to the same results
         self.assertTrue(
             np.allclose(output_fused_lora, output_unfused_lora, atol=1e-3, rtol=1e-3), "Fused lora should change the output"
         )

From 24dad332c84a969a6c0d541e8554d40b3e003426 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Wed, 4 Oct 2023 14:19:23 +0000
Subject: [PATCH 036/134] up

---
 tests/lora/test_lora_layers_peft.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index b10c1be896b4..2cc942c06bae 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -641,7 +641,7 @@ def test_simple_inference_with_text_unet_lora_unloaded(self):
         )
 
 
-    def test_simple_inference_with_text_unet_lora_unloaded(self):
+    def test_simple_inference_with_text_unet_lora_unfused(self):
         """
         Tests a simple inference with lora attached to text encoder and unet, then unloads the lora weights
         and makes sure it works as expected

From ec04337d6efebaba0f3ebd3f0b6d7ef0e6eb9064 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Wed, 4 Oct 2023 14:35:43 +0000
Subject: [PATCH 037/134] add set adapter from sourab and tests

---
 src/diffusers/loaders.py            | 49 +++++++++++++++++++++++++
 tests/lora/test_lora_layers_peft.py | 57 +++++++++++++++++++++++++++++
 2 files changed, 106 insertions(+)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index e44b501a5fb5..d2891b0c4642 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -2334,6 +2334,55 @@ def enable_lora_for_text_encoder(self, text_encoder: Optional[PreTrainedModel] =
             raise ValueError("Text Encoder not found.")
         set_adapter_layers(self.text_encoder, enabled=True)
 
+    
+    def set_adapters(
+        self,
+        adapter_names: Union[List[str], str],
+        weights: List[float] = None,
+    ):
+        """
+        Sets the adapter layers for the unet.
+        Args:
+            adapter_names (`List[str]` or `str`):
+                The names of the adapters to use.
+            weights (`List[float]`, *optional*):
+                The weights to use for the unet. If `None`, the weights are set to `1.0` for all the adapters.
+        """
+        if not self.use_peft_backend:
+            raise ValueError("PEFT backend is required for this method.")
+
+        def process_weights(adapter_names, weights):
+            if weights is None:
+                weights = [1.0] * len(adapter_names)
+            elif isinstance(weights, float):
+                weights = [weights]
+
+            if len(adapter_names) != len(weights):
+                raise ValueError(
+                    f"Length of adapter names {len(adapter_names)} is not equal to the length of the weights {len(weights)}"
+                )
+            return weights
+
+        adapter_names = [adapter_names] if isinstance(adapter_names, str) else adapter_names
+        weights = process_weights(adapter_names, weights)
+        set_weights_and_activate_adapters(self, adapter_names, weights)
+
+    def disable_lora(self):
+        """
+        Disables the LoRA layers for the unet.
+        """
+        if not self.use_peft_backend:
+            raise ValueError("PEFT backend is required for this method.")
+        set_adapter_layers(self, enabled=False)
+
+    def enable_lora(self):
+        """
+        Enables the LoRA layers for the unet.
+        """
+        if not self.use_peft_backend:
+            raise ValueError("PEFT backend is required for this method.")
+        set_adapter_layers(self, enabled=True)
+
 
 class FromSingleFileMixin:
     """
diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index 2cc942c06bae..bab406ab992e 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -690,6 +690,63 @@ def test_simple_inference_with_text_unet_lora_unfused(self):
         )
 
 
+    def test_simple_inference_with_text_unet_multi_adapter(self):
+        """
+        Tests a simple inference with lora attached to text encoder and unet, attaches
+        multiple adapters and set them 
+        """
+        components, _, text_lora_config, unet_lora_config = self.get_dummy_components()
+        pipe = self.pipeline_class(**components)
+        pipe = pipe.to(self.torch_device)
+        pipe.set_progress_bar_config(disable=None)
+        _, _, inputs = self.get_dummy_inputs(with_generator=False)
+
+        pipe.text_encoder.add_adapter(text_lora_config, "adapter-1")
+        pipe.text_encoder.add_adapter(text_lora_config, "adapter-2")
+
+
+        pipe.unet.add_adapter(unet_lora_config, "adapter-1")
+        pipe.unet.add_adapter(unet_lora_config, "adapter-2")
+
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
+
+        if self.has_two_text_encoders:
+            pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1")
+            pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-2")
+            self.assertTrue(
+                self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
+            )
+
+        # TODO: should we design an API at the pipeline level?
+        pipe.text_encoder.set_adapter("adapter-1")
+        pipe.unet.set_adapter("adapter-1")
+
+        output_adapter_1 = pipe(**inputs, generator=torch.manual_seed(0)).images
+
+        pipe.text_encoder.set_adapter("adapter-2")
+        pipe.unet.set_adapter("adapter-2")
+        output_adapter_2 = pipe(**inputs, generator=torch.manual_seed(0)).images
+
+
+        pipe.text_encoder.set_adapter(["adapter-1", "adapter-2"])
+        pipe.unet.set_adapter(["adapter-1", "adapter-2"])
+
+        output_adapter_mixed = pipe(**inputs, generator=torch.manual_seed(0)).images     
+
+        # Fuse and unfuse should lead to the same results
+        self.assertFalse(
+            np.allclose(output_adapter_1, output_adapter_2, atol=1e-3, rtol=1e-3), "Adapter 1 and 2 should give different results"
+        )
+
+        self.assertFalse(
+            np.allclose(output_adapter_1, output_adapter_mixed, atol=1e-3, rtol=1e-3), "Adapter 1 and mixed adapters should give different results"
+        )
+
+        self.assertFalse(
+            np.allclose(output_adapter_2, output_adapter_mixed, atol=1e-3, rtol=1e-3), "Adapter 2 and mixed adapters should give different results"
+        )
+
 class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
     pipeline_class = StableDiffusionPipeline
     scheduler_cls = DDIMScheduler

From b40592adf4cbd704132bf167f17060c4762a7439 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Wed, 4 Oct 2023 15:06:37 +0000
Subject: [PATCH 038/134] fix multi adapter tests

---
 src/diffusers/loaders.py            | 16 +++++++++++++---
 tests/lora/test_lora_layers_peft.py | 20 ++++++++++++++------
 2 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index d2891b0c4642..fb65e27c6371 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -2365,7 +2365,10 @@ def process_weights(adapter_names, weights):
 
         adapter_names = [adapter_names] if isinstance(adapter_names, str) else adapter_names
         weights = process_weights(adapter_names, weights)
-        set_weights_and_activate_adapters(self, adapter_names, weights)
+
+        for key, value in self.components.items():
+            if isinstance(value, nn.Module):
+                set_weights_and_activate_adapters(value, adapter_names, weights)
 
     def disable_lora(self):
         """
@@ -2373,7 +2376,11 @@ def disable_lora(self):
         """
         if not self.use_peft_backend:
             raise ValueError("PEFT backend is required for this method.")
-        set_adapter_layers(self, enabled=False)
+        
+        for key, value in self.components.items():
+            if isinstance(value, nn.Module):
+                set_adapter_layers(value, enabled=False)
+        
 
     def enable_lora(self):
         """
@@ -2381,7 +2388,10 @@ def enable_lora(self):
         """
         if not self.use_peft_backend:
             raise ValueError("PEFT backend is required for this method.")
-        set_adapter_layers(self, enabled=True)
+
+        for key, value in self.components.items():
+            if isinstance(value, nn.Module):
+                set_adapter_layers(value, enabled=True)
 
 
 class FromSingleFileMixin:
diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index bab406ab992e..80aa934cb5af 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -701,6 +701,8 @@ def test_simple_inference_with_text_unet_multi_adapter(self):
         pipe.set_progress_bar_config(disable=None)
         _, _, inputs = self.get_dummy_inputs(with_generator=False)
 
+        output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images     
+
         pipe.text_encoder.add_adapter(text_lora_config, "adapter-1")
         pipe.text_encoder.add_adapter(text_lora_config, "adapter-2")
 
@@ -719,18 +721,15 @@ def test_simple_inference_with_text_unet_multi_adapter(self):
             )
 
         # TODO: should we design an API at the pipeline level?
-        pipe.text_encoder.set_adapter("adapter-1")
-        pipe.unet.set_adapter("adapter-1")
+        pipe.set_adapters("adapter-1")
 
         output_adapter_1 = pipe(**inputs, generator=torch.manual_seed(0)).images
 
-        pipe.text_encoder.set_adapter("adapter-2")
-        pipe.unet.set_adapter("adapter-2")
+        pipe.set_adapters("adapter-2")
         output_adapter_2 = pipe(**inputs, generator=torch.manual_seed(0)).images
 
 
-        pipe.text_encoder.set_adapter(["adapter-1", "adapter-2"])
-        pipe.unet.set_adapter(["adapter-1", "adapter-2"])
+        pipe.set_adapters(["adapter-1", "adapter-2"])
 
         output_adapter_mixed = pipe(**inputs, generator=torch.manual_seed(0)).images     
 
@@ -747,6 +746,15 @@ def test_simple_inference_with_text_unet_multi_adapter(self):
             np.allclose(output_adapter_2, output_adapter_mixed, atol=1e-3, rtol=1e-3), "Adapter 2 and mixed adapters should give different results"
         )
 
+        pipe.disable_lora()
+
+        output_disabled = pipe(**inputs, generator=torch.manual_seed(0)).images   
+
+        self.assertTrue(
+            np.allclose(output_no_lora, output_disabled, atol=1e-3, rtol=1e-3), "output with no lora and output with lora disabled should give same results"
+        )
+
+
 class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
     pipeline_class = StableDiffusionPipeline
     scheduler_cls = DDIMScheduler

From 2646f3dcf09f6921c3522a15b42c52a831b11126 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Wed, 4 Oct 2023 15:11:38 +0000
Subject: [PATCH 039/134] style & quality

---
 src/diffusers/loaders.py            |  6 ++---
 tests/lora/test_lora_layers_peft.py | 41 ++++++++++++-----------------
 2 files changed, 19 insertions(+), 28 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index fb65e27c6371..3b56f924a365 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -2225,7 +2225,6 @@ def unfuse_lora(self, unfuse_unet: bool = True, unfuse_text_encoder: bool = True
                     if isinstance(module, BaseTunerLayer):
                         module.unmerge()
 
-
         if self.use_peft_backend:
             from peft.tuners.tuners_utils import BaseTunerLayer
 
@@ -2334,7 +2333,6 @@ def enable_lora_for_text_encoder(self, text_encoder: Optional[PreTrainedModel] =
             raise ValueError("Text Encoder not found.")
         set_adapter_layers(self.text_encoder, enabled=True)
 
-    
     def set_adapters(
         self,
         adapter_names: Union[List[str], str],
@@ -2342,6 +2340,7 @@ def set_adapters(
     ):
         """
         Sets the adapter layers for the unet.
+
         Args:
             adapter_names (`List[str]` or `str`):
                 The names of the adapters to use.
@@ -2376,11 +2375,10 @@ def disable_lora(self):
         """
         if not self.use_peft_backend:
             raise ValueError("PEFT backend is required for this method.")
-        
+
         for key, value in self.components.items():
             if isinstance(value, nn.Module):
                 set_adapter_layers(value, enabled=False)
-        
 
     def enable_lora(self):
         """
diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index 80aa934cb5af..63116eca80c5 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -553,7 +553,6 @@ def test_simple_inference_with_text_unet_lora_and_scale(self):
             "The scaling parameter has not been correctly restored!",
         )
 
-
     def test_simple_inference_with_text_lora_unet_fused(self):
         """
         Tests a simple inference with lora attached into text encoder + fuses the lora weights into base model
@@ -595,7 +594,6 @@ def test_simple_inference_with_text_lora_unet_fused(self):
             np.allclose(ouput_fused, output_no_lora, atol=1e-3, rtol=1e-3), "Fused lora should change the output"
         )
 
-
     def test_simple_inference_with_text_unet_lora_unloaded(self):
         """
         Tests a simple inference with lora attached to text encoder and unet, then unloads the lora weights
@@ -626,9 +624,7 @@ def test_simple_inference_with_text_unet_lora_unloaded(self):
         self.assertFalse(
             self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly unloaded in text encoder"
         )
-        self.assertFalse(
-            self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly unloaded in Unet"
-        )
+        self.assertFalse(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly unloaded in Unet")
 
         if self.has_two_text_encoders:
             self.assertFalse(
@@ -640,7 +636,6 @@ def test_simple_inference_with_text_unet_lora_unloaded(self):
             np.allclose(ouput_unloaded, output_no_lora, atol=1e-3, rtol=1e-3), "Fused lora should change the output"
         )
 
-
     def test_simple_inference_with_text_unet_lora_unfused(self):
         """
         Tests a simple inference with lora attached to text encoder and unet, then unloads the lora weights
@@ -672,12 +667,8 @@ def test_simple_inference_with_text_unet_lora_unfused(self):
 
         output_unfused_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
         # unloading should remove the LoRA layers
-        self.assertTrue(
-            self.check_if_lora_correctly_set(pipe.text_encoder), "Unfuse should still keep LoRA layers"
-        )
-        self.assertTrue(
-            self.check_if_lora_correctly_set(pipe.unet), "Unfuse should still keep LoRA layers"
-        )
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Unfuse should still keep LoRA layers")
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Unfuse should still keep LoRA layers")
 
         if self.has_two_text_encoders:
             self.assertTrue(
@@ -686,14 +677,14 @@ def test_simple_inference_with_text_unet_lora_unfused(self):
 
         # Fuse and unfuse should lead to the same results
         self.assertTrue(
-            np.allclose(output_fused_lora, output_unfused_lora, atol=1e-3, rtol=1e-3), "Fused lora should change the output"
+            np.allclose(output_fused_lora, output_unfused_lora, atol=1e-3, rtol=1e-3),
+            "Fused lora should change the output",
         )
 
-
     def test_simple_inference_with_text_unet_multi_adapter(self):
         """
         Tests a simple inference with lora attached to text encoder and unet, attaches
-        multiple adapters and set them 
+        multiple adapters and set them
         """
         components, _, text_lora_config, unet_lora_config = self.get_dummy_components()
         pipe = self.pipeline_class(**components)
@@ -701,12 +692,11 @@ def test_simple_inference_with_text_unet_multi_adapter(self):
         pipe.set_progress_bar_config(disable=None)
         _, _, inputs = self.get_dummy_inputs(with_generator=False)
 
-        output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images     
+        output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
 
         pipe.text_encoder.add_adapter(text_lora_config, "adapter-1")
         pipe.text_encoder.add_adapter(text_lora_config, "adapter-2")
 
-
         pipe.unet.add_adapter(unet_lora_config, "adapter-1")
         pipe.unet.add_adapter(unet_lora_config, "adapter-2")
 
@@ -728,30 +718,33 @@ def test_simple_inference_with_text_unet_multi_adapter(self):
         pipe.set_adapters("adapter-2")
         output_adapter_2 = pipe(**inputs, generator=torch.manual_seed(0)).images
 
-
         pipe.set_adapters(["adapter-1", "adapter-2"])
 
-        output_adapter_mixed = pipe(**inputs, generator=torch.manual_seed(0)).images     
+        output_adapter_mixed = pipe(**inputs, generator=torch.manual_seed(0)).images
 
         # Fuse and unfuse should lead to the same results
         self.assertFalse(
-            np.allclose(output_adapter_1, output_adapter_2, atol=1e-3, rtol=1e-3), "Adapter 1 and 2 should give different results"
+            np.allclose(output_adapter_1, output_adapter_2, atol=1e-3, rtol=1e-3),
+            "Adapter 1 and 2 should give different results",
         )
 
         self.assertFalse(
-            np.allclose(output_adapter_1, output_adapter_mixed, atol=1e-3, rtol=1e-3), "Adapter 1 and mixed adapters should give different results"
+            np.allclose(output_adapter_1, output_adapter_mixed, atol=1e-3, rtol=1e-3),
+            "Adapter 1 and mixed adapters should give different results",
         )
 
         self.assertFalse(
-            np.allclose(output_adapter_2, output_adapter_mixed, atol=1e-3, rtol=1e-3), "Adapter 2 and mixed adapters should give different results"
+            np.allclose(output_adapter_2, output_adapter_mixed, atol=1e-3, rtol=1e-3),
+            "Adapter 2 and mixed adapters should give different results",
         )
 
         pipe.disable_lora()
 
-        output_disabled = pipe(**inputs, generator=torch.manual_seed(0)).images   
+        output_disabled = pipe(**inputs, generator=torch.manual_seed(0)).images
 
         self.assertTrue(
-            np.allclose(output_no_lora, output_disabled, atol=1e-3, rtol=1e-3), "output with no lora and output with lora disabled should give same results"
+            np.allclose(output_no_lora, output_disabled, atol=1e-3, rtol=1e-3),
+            "output with no lora and output with lora disabled should give same results",
         )
 
 

From 02e73a4927c332e6e1759152f8cca59997ac5a12 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Wed, 4 Oct 2023 15:53:03 +0000
Subject: [PATCH 040/134] style

---
 src/diffusers/loaders.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index 79f27d0dde68..055f55c44b4b 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -1686,14 +1686,13 @@ def load_lora_into_text_encoder(
                     if adapter_name is None:
                         adapter_name = get_adapter_name(text_encoder)
 
-
                     # inject LoRA layers and load the state dict
                     text_encoder.load_adapter(
                         adapter_name=adapter_name,
                         adapter_state_dict=text_encoder_lora_state_dict,
                         peft_config=lora_config,
                     )
-                    
+
                     # scale LoRA layers with `lora_scale`
                     scale_lora_layers(text_encoder, weight=lora_scale)
 

From 86c7d698c8dcb39ee5902b07ed55579af1a4bd28 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Wed, 4 Oct 2023 15:53:44 +0000
Subject: [PATCH 041/134] remove comment

---
 tests/lora/test_lora_layers_peft.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index 63116eca80c5..6695dc53cfd8 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -710,7 +710,6 @@ def test_simple_inference_with_text_unet_multi_adapter(self):
                 self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
             )
 
-        # TODO: should we design an API at the pipeline level?
         pipe.set_adapters("adapter-1")
 
         output_adapter_1 = pipe(**inputs, generator=torch.manual_seed(0)).images

From 94abbc0341b9b60fda516977adafa26721204aeb Mon Sep 17 00:00:00 2001
From: Sourab Mangrulkar <13534540+pacman100@users.noreply.github.com>
Date: Thu, 5 Oct 2023 12:12:03 +0530
Subject: [PATCH 042/134] fix `adapter_name` issues

---
 src/diffusers/loaders.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index 055f55c44b4b..1cff0720a6f1 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -1187,6 +1187,7 @@ def load_lora_weights(
             network_alphas=network_alphas,
             unet=self.unet,
             low_cpu_mem_usage=low_cpu_mem_usage,
+            adapter_name=adapter_name,
             _pipeline=self,
         )
         self.load_lora_into_text_encoder(
@@ -1497,7 +1498,7 @@ def _maybe_map_sgm_blocks_to_diffusers(cls, state_dict, unet_config, delimiter="
 
     @classmethod
     def load_lora_into_unet(
-        cls, state_dict, network_alphas, unet, low_cpu_mem_usage=None, _pipeline=None, adapter_name=None
+        cls, state_dict, network_alphas, unet, low_cpu_mem_usage=None, adapter_name=None, _pipeline=None
     ):
         """
         This will load the LoRA layers specified in `state_dict` into `unet`.
@@ -2987,7 +2988,9 @@ class StableDiffusionXLLoraLoaderMixin(LoraLoaderMixin):
     """This class overrides `LoraLoaderMixin` with LoRA loading/saving code that's specific to SDXL"""
 
     # Overrride to properly handle the loading and unloading of the additional text encoder.
-    def load_lora_weights(self, pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]], **kwargs):
+    def load_lora_weights(
+        self, pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]], adapter_name=None, **kwargs
+    ):
         """
         Load LoRA weights specified in `pretrained_model_name_or_path_or_dict` into `self.unet` and
         `self.text_encoder`.
@@ -3005,6 +3008,9 @@ def load_lora_weights(self, pretrained_model_name_or_path_or_dict: Union[str, Di
         Parameters:
             pretrained_model_name_or_path_or_dict (`str` or `os.PathLike` or `dict`):
                 See [`~loaders.LoraLoaderMixin.lora_state_dict`].
+            adapter_name (`str`, *optional*):
+                Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
+                `default_{i}` where i is the total number of adapters being loaded.
             kwargs (`dict`, *optional*):
                 See [`~loaders.LoraLoaderMixin.lora_state_dict`].
         """
@@ -3031,6 +3037,7 @@ def load_lora_weights(self, pretrained_model_name_or_path_or_dict: Union[str, Di
                 text_encoder=self.text_encoder,
                 prefix="text_encoder",
                 lora_scale=self.lora_scale,
+                adapter_name=adapter_name,
                 _pipeline=self,
             )
 
@@ -3042,6 +3049,7 @@ def load_lora_weights(self, pretrained_model_name_or_path_or_dict: Union[str, Di
                 text_encoder=self.text_encoder_2,
                 prefix="text_encoder_2",
                 lora_scale=self.lora_scale,
+                adapter_name=adapter_name,
                 _pipeline=self,
             )
 

From 61e316c07418a361e1e24344431a836875ee3155 Mon Sep 17 00:00:00 2001
From: Sourab Mangrulkar <13534540+pacman100@users.noreply.github.com>
Date: Thu, 5 Oct 2023 12:20:53 +0530
Subject: [PATCH 043/134] fix unet adapter name for sdxl

---
 src/diffusers/loaders.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index 1cff0720a6f1..ea423aac8144 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -3028,7 +3028,9 @@ def load_lora_weights(
         if not is_correct_format:
             raise ValueError("Invalid LoRA checkpoint.")
 
-        self.load_lora_into_unet(state_dict, network_alphas=network_alphas, unet=self.unet, _pipeline=self)
+        self.load_lora_into_unet(
+            state_dict, network_alphas=network_alphas, unet=self.unet, adapter_name=adapter_name, _pipeline=self
+        )
         text_encoder_state_dict = {k: v for k, v in state_dict.items() if "text_encoder." in k}
         if len(text_encoder_state_dict) > 0:
             self.load_lora_into_text_encoder(

From 32dd0d5bfdcadb847d4b89b70036a88c082751d3 Mon Sep 17 00:00:00 2001
From: Sourab Mangrulkar <13534540+pacman100@users.noreply.github.com>
Date: Thu, 5 Oct 2023 12:43:29 +0530
Subject: [PATCH 044/134] fix enabling/disabling adapters

---
 src/diffusers/utils/peft_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/diffusers/utils/peft_utils.py b/src/diffusers/utils/peft_utils.py
index 8df5bdf675b9..035e7b1d5cf6 100644
--- a/src/diffusers/utils/peft_utils.py
+++ b/src/diffusers/utils/peft_utils.py
@@ -163,9 +163,9 @@ def set_adapter_layers(model, enabled=True):
         if isinstance(module, BaseTunerLayer):
             # The recent version of PEFT needs to call `enable_adapters` instead
             if hasattr(module, "enable_adapters"):
-                module.enable_adapters(enabled=False)
+                module.enable_adapters(enabled=enabled)
             else:
-                module.disable_adapters = True
+                module.disable_adapters = not enabled
 
 
 def set_weights_and_activate_adapters(model, adapter_names, weights):

From ba6c180cfd357eedfa7960ea9c253ff4db1b4ddd Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Thu, 5 Oct 2023 07:43:23 +0000
Subject: [PATCH 045/134] fix fuse / unfuse unet

---
 src/diffusers/loaders.py | 26 ++++++++++++++++++++++----
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index 055f55c44b4b..bcbce9e5bf04 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -669,15 +669,33 @@ def fuse_lora(self, lora_scale=1.0):
         self.apply(self._fuse_lora_apply)
 
     def _fuse_lora_apply(self, module):
-        if hasattr(module, "_fuse_lora"):
-            module._fuse_lora(self.lora_scale)
+        if not self.use_peft_backend:
+            if hasattr(module, "_fuse_lora"):
+                module._fuse_lora(self.lora_scale)
+        else:
+            from peft.tuners.tuners_utils import BaseTunerLayer
+
+            if isinstance(module, BaseTunerLayer):
+                if self.lora_scale != 1.0:
+                    module.scale_layer(self.lora_scale)
+
+                module.merge()
 
     def unfuse_lora(self):
         self.apply(self._unfuse_lora_apply)
 
     def _unfuse_lora_apply(self, module):
-        if hasattr(module, "_unfuse_lora"):
-            module._unfuse_lora()
+        if not self.use_peft_backend:
+            if hasattr(module, "_unfuse_lora"):
+                module._unfuse_lora()
+        else:
+            from peft.tuners.tuners_utils import BaseTunerLayer
+
+            if isinstance(module, BaseTunerLayer):
+                if self.lora_scale != 1.0:
+                    module.unscale_layer()
+
+                module.unmerge()
 
     def set_adapters(
         self,

From c0d9d6851ace6a2032415dd623a5c7b34a00a2f7 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Thu, 5 Oct 2023 07:57:18 +0000
Subject: [PATCH 046/134] nit

---
 src/diffusers/loaders.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index 7527e4e718ea..3daee3d508ae 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -692,9 +692,6 @@ def _unfuse_lora_apply(self, module):
             from peft.tuners.tuners_utils import BaseTunerLayer
 
             if isinstance(module, BaseTunerLayer):
-                if self.lora_scale != 1.0:
-                    module.unscale_layer()
-
                 module.unmerge()
 
     def set_adapters(

From 7e1e252f11c99e196e0e083afaf032cf70f050ab Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Thu, 5 Oct 2023 08:19:27 +0000
Subject: [PATCH 047/134] fix

---
 src/diffusers/loaders.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index 3daee3d508ae..814c266e7e26 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -1793,7 +1793,8 @@ def _remove_text_encoder_monkey_patch(self):
         if hasattr(self, "text_encoder"):
             remove_method(self.text_encoder)
 
-            if self.use_peft_backend:
+            # In case text encoder have no Lora attached
+            if self.use_peft_backend and getattr(self.text_encoder, "peft_config", None) is not None:
                 del self.text_encoder.peft_config
                 self.text_encoder._hf_peft_config_loaded = None
         if hasattr(self, "text_encoder_2"):

From f4a52292107c0cc9d2aa1e6e20a018918a48e0e3 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Thu, 5 Oct 2023 08:20:19 +0000
Subject: [PATCH 048/134] up

---
 src/diffusers/loaders.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index 814c266e7e26..f27b377db95a 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -3137,13 +3137,14 @@ def _remove_text_encoder_monkey_patch(self):
         if self.use_peft_backend:
             recurse_remove_peft_layers(self.text_encoder)
             # TODO: @younesbelkada handle this in transformers side
-            del self.text_encoder.peft_config
-            self.text_encoder._hf_peft_config_loaded = None
+            if getattr(self.text_encoder, "peft_config", None) is not None:
+                del self.text_encoder.peft_config
+                self.text_encoder._hf_peft_config_loaded = None
 
             recurse_remove_peft_layers(self.text_encoder_2)
-
-            del self.text_encoder_2.peft_config
-            self.text_encoder_2._hf_peft_config_loaded = None
+            if getattr(self.text_encoder_2, "peft_config", None) is not None:
+                del self.text_encoder_2.peft_config
+                self.text_encoder_2._hf_peft_config_loaded = None
         else:
             self._remove_text_encoder_monkey_patch_classmethod(self.text_encoder)
             self._remove_text_encoder_monkey_patch_classmethod(self.text_encoder_2)

From 8dc6b871eb0b29df3dcb617707bd209925fc30f6 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Thu, 5 Oct 2023 09:08:14 +0000
Subject: [PATCH 049/134] fix cpu offloading

---
 src/diffusers/loaders.py | 102 +++++++++++++++++++++++++++++----------
 1 file changed, 76 insertions(+), 26 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index f27b377db95a..04e84775fbca 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -521,33 +521,40 @@ def load_attn_procs(self, pretrained_model_name_or_path_or_dict: Union[str, Dict
         # Now we remove any existing hooks to
         is_model_cpu_offload = False
         is_sequential_cpu_offload = False
-        if _pipeline is not None:
-            for _, component in _pipeline.components.items():
-                if isinstance(component, nn.Module):
-                    if hasattr(component, "_hf_hook"):
-                        is_model_cpu_offload = isinstance(getattr(component, "_hf_hook"), CpuOffload)
-                        is_sequential_cpu_offload = isinstance(getattr(component, "_hf_hook"), AlignDevicesHook)
-                        logger.info(
-                            "Accelerate hooks detected. Since you have called `load_lora_weights()`, the previous hooks will be first removed. Then the LoRA parameters will be loaded and the hooks will be applied again."
-                        )
-                        remove_hook_from_module(component, recurse=is_sequential_cpu_offload)
 
-        # only custom diffusion needs to set attn processors
-        if is_custom_diffusion:
-            self.set_attn_processor(attn_processors)
-
-        # set lora layers
-        for target_module, lora_layer in lora_layers_list:
-            target_module.set_lora_layer(lora_layer)
-
-        self.to(dtype=self.dtype, device=self.device)
-
-        # Offload back.
-        if is_model_cpu_offload:
-            _pipeline.enable_model_cpu_offload()
-        elif is_sequential_cpu_offload:
-            _pipeline.enable_sequential_cpu_offload()
-        # Unsafe code />
+        # For PEFT backend the Unet is already offloaded at this stage as it is handled inside `lora_lora_weights_into_unet`
+        if not self.use_peft_backend:
+            if _pipeline is not None:
+                for _, component in _pipeline.components.items():
+                    if isinstance(component, nn.Module):
+                        if hasattr(component, "_hf_hook"):
+                            is_model_cpu_offload = isinstance(getattr(component, "_hf_hook"), CpuOffload)
+                            is_sequential_cpu_offload = isinstance(getattr(component, "_hf_hook"), AlignDevicesHook)
+
+                            # There is no need to remove the hooks as they have been already attached in case LoRA
+                            # if not self.use_peft_backend:
+                            logger.info(
+                                "Accelerate hooks detected. Since you have called `load_lora_weights()`, the previous hooks will be first removed. Then the LoRA parameters will be loaded and the hooks will be applied again."
+                            )
+                            remove_hook_from_module(component, recurse=is_sequential_cpu_offload)
+
+            # only custom diffusion needs to set attn processors
+            if is_custom_diffusion:
+                self.set_attn_processor(attn_processors)
+
+            # set lora layers
+            for target_module, lora_layer in lora_layers_list:
+                target_module.set_lora_layer(lora_layer)
+
+            # if not already_offloaded:
+            self.to(dtype=self.dtype, device=self.device)
+
+            # Offload back.
+            if is_model_cpu_offload:
+                _pipeline.enable_model_cpu_offload()
+            elif is_sequential_cpu_offload:
+                _pipeline.enable_sequential_cpu_offload()
+            # Unsafe code />
 
     def convert_state_dict_legacy_attn_format(self, state_dict, network_alphas):
         is_new_lora_format = all(
@@ -1511,6 +1518,38 @@ def _maybe_map_sgm_blocks_to_diffusers(cls, state_dict, unet_config, delimiter="
 
         return new_state_dict
 
+    @classmethod
+    def _optionally_disable_offloading(cls, _pipeline):
+        """
+        Optionnally removes offloading in case the pipeline has been already sequentially offloaded to CPU.
+
+        Args:
+            _pipeline (`Pipeline`):
+                The pipeline to disable offloading for.
+
+        Returns:
+            tuple:
+                A tuple indicating if `is_model_cpu_offload` or `is_sequential_cpu_offload` is True.
+        """
+        is_model_cpu_offload = False
+        is_sequential_cpu_offload = False
+
+        if _pipeline is not None:
+            for _, component in _pipeline.components.items():
+                if isinstance(component, nn.Module):
+                    if hasattr(component, "_hf_hook"):
+                        is_model_cpu_offload = isinstance(getattr(component, "_hf_hook"), CpuOffload)
+                        is_sequential_cpu_offload = isinstance(getattr(component, "_hf_hook"), AlignDevicesHook)
+
+                        # There is no need to remove the hooks as they have been already attached in case LoRA
+                        # if not self.use_peft_backend:
+                        logger.info(
+                            "Accelerate hooks detected. Since you have called `load_lora_weights()`, the previous hooks will be first removed. Then the LoRA parameters will be loaded and the hooks will be applied again."
+                        )
+                        remove_hook_from_module(component, recurse=is_sequential_cpu_offload)
+
+        return (is_model_cpu_offload, is_sequential_cpu_offload)
+
     @classmethod
     def load_lora_into_unet(
         cls, state_dict, network_alphas, unet, low_cpu_mem_usage=None, adapter_name=None, _pipeline=None
@@ -1578,6 +1617,10 @@ def load_lora_into_unet(
             if adapter_name is None:
                 adapter_name = get_adapter_name(unet)
 
+            # In case the pipeline has been already offloaded to CPU - temporarly remove the hooks
+            # otherwise loading LoRA weights will lead to an error
+            is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline)
+
             inject_adapter_in_model(lora_config, unet, adapter_name=adapter_name)
             incompatible_keys = set_peft_model_state_dict(unet, state_dict, adapter_name)
 
@@ -1592,6 +1635,13 @@ def load_lora_into_unet(
                     # At this point all LoRA layars has been loaded so we init back an empty state_dict
                     state_dict = {}
 
+            # Offload back.
+            if is_model_cpu_offload:
+                _pipeline.enable_model_cpu_offload()
+            elif is_sequential_cpu_offload:
+                _pipeline.enable_sequential_cpu_offload()
+            # Unsafe code />
+
         unet.load_attn_procs(
             state_dict, network_alphas=network_alphas, low_cpu_mem_usage=low_cpu_mem_usage, _pipeline=_pipeline
         )

From 041304958e9dd24b319a84e33780bad5ce4d4a08 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Thu, 5 Oct 2023 12:19:21 +0000
Subject: [PATCH 050/134] fix another slow test

---
 tests/lora/test_lora_layers_old_backend.py | 26 +++++++++++++++++-----
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index ae90f8b6a4b8..9f2f95c23f97 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -2256,21 +2256,34 @@ def test_sdxl_1_0_last_ben(self):
         self.assertTrue(np.allclose(images, expected, atol=1e-3))
 
     def test_sdxl_1_0_fuse_unfuse_all(self):
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.bfloat16)
         text_encoder_1_sd = copy.deepcopy(pipe.text_encoder.state_dict())
         text_encoder_2_sd = copy.deepcopy(pipe.text_encoder_2.state_dict())
         unet_sd = copy.deepcopy(pipe.unet.state_dict())
 
         pipe.load_lora_weights(
-            "davizca87/sun-flower", weight_name="snfw3rXL-000004.safetensors", torch_dtype=torch.float16
+            "davizca87/sun-flower", weight_name="snfw3rXL-000004.safetensors", torch_dtype=torch.bfloat16
         )
+
+        fused_te_state_dict = pipe.text_encoder.state_dict()
+        fused_te_2_state_dict = pipe.text_encoder_2.state_dict()
+        unet_state_dict = pipe.unet.state_dict()
+
+        for key, value in text_encoder_1_sd.items():
+            self.assertTrue(torch.allclose(fused_te_state_dict[key], value))
+
+        for key, value in text_encoder_2_sd.items():
+            self.assertTrue(torch.allclose(fused_te_2_state_dict[key], value))
+
+        for key, value in unet_state_dict.items():
+            self.assertTrue(torch.allclose(unet_state_dict[key], value))
+
         pipe.fuse_lora()
         pipe.unload_lora_weights()
-        pipe.unfuse_lora()
 
-        assert state_dicts_almost_equal(text_encoder_1_sd, pipe.text_encoder.state_dict())
-        assert state_dicts_almost_equal(text_encoder_2_sd, pipe.text_encoder_2.state_dict())
-        assert state_dicts_almost_equal(unet_sd, pipe.unet.state_dict())
+        assert not state_dicts_almost_equal(text_encoder_1_sd, pipe.text_encoder.state_dict())
+        assert not state_dicts_almost_equal(text_encoder_2_sd, pipe.text_encoder_2.state_dict())
+        assert not state_dicts_almost_equal(unet_sd, pipe.unet.state_dict())
 
     def test_sdxl_1_0_lora_with_sequential_cpu_offloading(self):
         generator = torch.Generator().manual_seed(0)
@@ -2279,6 +2292,7 @@ def test_sdxl_1_0_lora_with_sequential_cpu_offloading(self):
         pipe.enable_sequential_cpu_offload()
         lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
         lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
+
         pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
 
         images = pipe(

From 6fe1b2dc3299ce24c39aa988d92e73c2108f46c6 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Thu, 5 Oct 2023 12:43:52 +0000
Subject: [PATCH 051/134] fix another offload test

---
 src/diffusers/loaders.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index cf943692d7a9..bd8828f2e305 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -1752,6 +1752,8 @@ def load_lora_into_text_encoder(
                     if adapter_name is None:
                         adapter_name = get_adapter_name(text_encoder)
 
+                    is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline)
+
                     # inject LoRA layers and load the state dict
                     text_encoder.load_adapter(
                         adapter_name=adapter_name,
@@ -1761,9 +1763,6 @@ def load_lora_into_text_encoder(
 
                     # scale LoRA layers with `lora_scale`
                     scale_lora_layers(text_encoder, weight=lora_scale)
-
-                    is_model_cpu_offload = False
-                    is_sequential_cpu_offload = False
                 else:
                     cls._modify_text_encoder(
                         text_encoder,

From 206f0de27643fa984d380dfe48a08f3a0a2f7d13 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Thu, 5 Oct 2023 12:56:09 +0000
Subject: [PATCH 052/134] add more tests

---
 tests/lora/test_lora_layers_peft.py | 59 +++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index 6695dc53cfd8..24b90ec58241 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -834,6 +834,65 @@ def test_integration_logits_no_scale(self):
         self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
 
 
+    @slow
+    @require_torch_gpu
+    def test_integration_logits_multi_adapter(self):
+        path = "stabilityai/stable-diffusion-xl-base-1.0"
+        lora_id = "CiroN2022/toy-face"
+
+        pipe = StableDiffusionXLPipeline.from_pretrained(path, torch_dtype=torch.float16)
+        pipe.load_lora_weights(lora_id, weight_name="toy_face_sdxl.safetensors", adapter_name="toy")
+        pipe = pipe.to("cuda")
+
+
+        self.assertTrue(
+            self.check_if_lora_correctly_set(pipe.unet),
+            "Lora not correctly set in Unet",
+        )
+
+        prompt = "toy_face of a hacker with a hoodie"
+
+        lora_scale = 0.9
+
+        images = pipe(
+            prompt=prompt, 
+            num_inference_steps=30, 
+            generator=torch.manual_seed(0), 
+            cross_attention_kwargs={"scale": lora_scale},
+            output_type="np"
+        ).images
+        expected_slice_scale = np.array([0.538, 0.539, 0.540, 0.540, 0.542, 0.539, 0.538, 0.541, 0.539])
+
+        predicted_slice = images[0, -3:, -3:, -1].flatten()
+        # import pdb; pdb.set_trace()
+        self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
+
+        pipe.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors", adapter_name="pixel")
+        pipe.set_adapters("pixel")
+
+        prompt = "pixel art, a hacker with a hoodie, simple, flat colors"
+        images = pipe(prompt, num_inference_steps=30, guidance_scale=7.5, cross_attention_kwargs={"scale": lora_scale}, generator=torch.manual_seed(0), output_type="np").images
+        
+        predicted_slice = images[0, -3:, -3:, -1].flatten()
+        expected_slice_scale = np.array([0.61973065, 0.62018543, 0.62181497, 0.61933696, 0.6208608, 0.620576, 0.6200281 , 0.62258327, 0.6259889])
+        self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
+        
+
+        # multi-adapter inference
+        pipe.set_adapters(["pixel","toy"], unet_weights=[0.5,1.0])
+        images = pipe(prompt, num_inference_steps=30, guidance_scale=7.5, cross_attention_kwargs={"scale": 1.0}, generator=torch.manual_seed(0), output_type="np").images
+        predicted_slice = images[0, -3:, -3:, -1].flatten()
+        expected_slice_scale = np.array([0.5977, 0.5985, 0.6039, 0.5976, 0.6025, 0.6036, 0.5946, 0.5979, 0.5998])
+        self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
+
+        # Lora disabled
+        pipe.disable_lora()
+        images = pipe(prompt, num_inference_steps=30, guidance_scale=7.5, cross_attention_kwargs={"scale": lora_scale}, generator=torch.manual_seed(0), output_type="np").images
+        predicted_slice = images[0, -3:, -3:, -1].flatten()
+        expected_slice_scale = np.array([0.54625, 0.5473, 0.5495, 0.5465, 0.5476, 0.5461, 0.5452, 0.5485 , 0.5493])
+        self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
+
+
 class StableDiffusionXLLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
     has_two_text_encoders = True
     pipeline_class = StableDiffusionXLPipeline

From 2265fc2e27fa37e6ea480a769eaa6997a82e71f2 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Thu, 5 Oct 2023 15:20:57 +0000
Subject: [PATCH 053/134] all slow tests pass

---
 src/diffusers/loaders.py          | 5 +++++
 src/diffusers/utils/peft_utils.py | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index bd8828f2e305..b7b5b4f850c4 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -1605,11 +1605,16 @@ def load_lora_into_unet(
 
             state_dict = convert_unet_state_dict_to_peft(state_dict)
 
+            if network_alphas is not None:
+                network_alphas = convert_unet_state_dict_to_peft(network_alphas)
+
             rank = {}
             for key in state_dict.keys():
                 if "lora_B" in key:
                     rank[key] = state_dict[key].shape[1]
 
+
+
             lora_config_kwargs = get_peft_kwargs(rank, network_alphas, state_dict)
             lora_config = LoraConfig(**lora_config_kwargs)
 
diff --git a/src/diffusers/utils/peft_utils.py b/src/diffusers/utils/peft_utils.py
index 035e7b1d5cf6..a60c479643ad 100644
--- a/src/diffusers/utils/peft_utils.py
+++ b/src/diffusers/utils/peft_utils.py
@@ -130,7 +130,7 @@ def get_peft_kwargs(rank_dict, network_alpha_dict, peft_state_dict):
 
             # for modules with alpha different from the most occuring alpha, add it to the `alpha_pattern`
             alpha_pattern = dict(filter(lambda x: x[1] != lora_alpha, network_alpha_dict.items()))
-            alpha_pattern = {".".join(k.split(".down.")[0].split(".")[:-1]): v for k, v in alpha_pattern.items()}
+            alpha_pattern = {".".join(k.split(".lora_A.")[0].split(".")).replace(".alpha", ""): v for k, v in alpha_pattern.items()}
         else:
             lora_alpha = set(network_alpha_dict.values()).pop()
 

From 265a92806449cbc7da47de2037a8a3364b93ef3c Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Thu, 5 Oct 2023 15:24:11 +0000
Subject: [PATCH 054/134] style

---
 src/diffusers/loaders.py                   |  2 -
 src/diffusers/utils/peft_utils.py          |  4 +-
 tests/lora/test_lora_layers_old_backend.py |  4 +-
 tests/lora/test_lora_layers_peft.py        | 48 +++++++++++++++-------
 4 files changed, 40 insertions(+), 18 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index b7b5b4f850c4..0f9ca2d424e1 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -1613,8 +1613,6 @@ def load_lora_into_unet(
                 if "lora_B" in key:
                     rank[key] = state_dict[key].shape[1]
 
-
-
             lora_config_kwargs = get_peft_kwargs(rank, network_alphas, state_dict)
             lora_config = LoraConfig(**lora_config_kwargs)
 
diff --git a/src/diffusers/utils/peft_utils.py b/src/diffusers/utils/peft_utils.py
index a60c479643ad..53006cd54c9d 100644
--- a/src/diffusers/utils/peft_utils.py
+++ b/src/diffusers/utils/peft_utils.py
@@ -130,7 +130,9 @@ def get_peft_kwargs(rank_dict, network_alpha_dict, peft_state_dict):
 
             # for modules with alpha different from the most occuring alpha, add it to the `alpha_pattern`
             alpha_pattern = dict(filter(lambda x: x[1] != lora_alpha, network_alpha_dict.items()))
-            alpha_pattern = {".".join(k.split(".lora_A.")[0].split(".")).replace(".alpha", ""): v for k, v in alpha_pattern.items()}
+            alpha_pattern = {
+                ".".join(k.split(".lora_A.")[0].split(".")).replace(".alpha", ""): v for k, v in alpha_pattern.items()
+            }
         else:
             lora_alpha = set(network_alpha_dict.values()).pop()
 
diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index 7bcd2333150f..cf1d3496ac4a 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -2256,7 +2256,9 @@ def test_sdxl_1_0_last_ben(self):
         self.assertTrue(np.allclose(images, expected, atol=1e-3))
 
     def test_sdxl_1_0_fuse_unfuse_all(self):
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.bfloat16)
+        pipe = DiffusionPipeline.from_pretrained(
+            "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.bfloat16
+        )
         text_encoder_1_sd = copy.deepcopy(pipe.text_encoder.state_dict())
         text_encoder_2_sd = copy.deepcopy(pipe.text_encoder_2.state_dict())
         unet_sd = copy.deepcopy(pipe.unet.state_dict())
diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index 24b90ec58241..0868af8362f5 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -833,7 +833,6 @@ def test_integration_logits_no_scale(self):
 
         self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
 
-
     @slow
     @require_torch_gpu
     def test_integration_logits_multi_adapter(self):
@@ -844,7 +843,6 @@ def test_integration_logits_multi_adapter(self):
         pipe.load_lora_weights(lora_id, weight_name="toy_face_sdxl.safetensors", adapter_name="toy")
         pipe = pipe.to("cuda")
 
-
         self.assertTrue(
             self.check_if_lora_correctly_set(pipe.unet),
             "Lora not correctly set in Unet",
@@ -855,11 +853,11 @@ def test_integration_logits_multi_adapter(self):
         lora_scale = 0.9
 
         images = pipe(
-            prompt=prompt, 
-            num_inference_steps=30, 
-            generator=torch.manual_seed(0), 
+            prompt=prompt,
+            num_inference_steps=30,
+            generator=torch.manual_seed(0),
             cross_attention_kwargs={"scale": lora_scale},
-            output_type="np"
+            output_type="np",
         ).images
         expected_slice_scale = np.array([0.538, 0.539, 0.540, 0.540, 0.542, 0.539, 0.538, 0.541, 0.539])
 
@@ -871,25 +869,47 @@ def test_integration_logits_multi_adapter(self):
         pipe.set_adapters("pixel")
 
         prompt = "pixel art, a hacker with a hoodie, simple, flat colors"
-        images = pipe(prompt, num_inference_steps=30, guidance_scale=7.5, cross_attention_kwargs={"scale": lora_scale}, generator=torch.manual_seed(0), output_type="np").images
-        
+        images = pipe(
+            prompt,
+            num_inference_steps=30,
+            guidance_scale=7.5,
+            cross_attention_kwargs={"scale": lora_scale},
+            generator=torch.manual_seed(0),
+            output_type="np",
+        ).images
+
         predicted_slice = images[0, -3:, -3:, -1].flatten()
-        expected_slice_scale = np.array([0.61973065, 0.62018543, 0.62181497, 0.61933696, 0.6208608, 0.620576, 0.6200281 , 0.62258327, 0.6259889])
+        expected_slice_scale = np.array(
+            [0.61973065, 0.62018543, 0.62181497, 0.61933696, 0.6208608, 0.620576, 0.6200281, 0.62258327, 0.6259889]
+        )
         self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
-        
 
         # multi-adapter inference
-        pipe.set_adapters(["pixel","toy"], unet_weights=[0.5,1.0])
-        images = pipe(prompt, num_inference_steps=30, guidance_scale=7.5, cross_attention_kwargs={"scale": 1.0}, generator=torch.manual_seed(0), output_type="np").images
+        pipe.set_adapters(["pixel", "toy"], unet_weights=[0.5, 1.0])
+        images = pipe(
+            prompt,
+            num_inference_steps=30,
+            guidance_scale=7.5,
+            cross_attention_kwargs={"scale": 1.0},
+            generator=torch.manual_seed(0),
+            output_type="np",
+        ).images
         predicted_slice = images[0, -3:, -3:, -1].flatten()
         expected_slice_scale = np.array([0.5977, 0.5985, 0.6039, 0.5976, 0.6025, 0.6036, 0.5946, 0.5979, 0.5998])
         self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
 
         # Lora disabled
         pipe.disable_lora()
-        images = pipe(prompt, num_inference_steps=30, guidance_scale=7.5, cross_attention_kwargs={"scale": lora_scale}, generator=torch.manual_seed(0), output_type="np").images
+        images = pipe(
+            prompt,
+            num_inference_steps=30,
+            guidance_scale=7.5,
+            cross_attention_kwargs={"scale": lora_scale},
+            generator=torch.manual_seed(0),
+            output_type="np",
+        ).images
         predicted_slice = images[0, -3:, -3:, -1].flatten()
-        expected_slice_scale = np.array([0.54625, 0.5473, 0.5495, 0.5465, 0.5476, 0.5461, 0.5452, 0.5485 , 0.5493])
+        expected_slice_scale = np.array([0.54625, 0.5473, 0.5495, 0.5465, 0.5476, 0.5461, 0.5452, 0.5485, 0.5493])
         self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
 
 

From e7a3dc6eee9b6f070cc0a3f3d1eb8ee2040b247b Mon Sep 17 00:00:00 2001
From: Sourab Mangrulkar <13534540+pacman100@users.noreply.github.com>
Date: Fri, 6 Oct 2023 09:59:24 +0530
Subject: [PATCH 055/134] fix alpha pattern for unet and text encoder

---
 src/diffusers/loaders.py          |  6 ++++--
 src/diffusers/utils/peft_utils.py | 13 +++++++++----
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index 0f9ca2d424e1..53fa2b2f771d 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -1613,7 +1613,7 @@ def load_lora_into_unet(
                 if "lora_B" in key:
                     rank[key] = state_dict[key].shape[1]
 
-            lora_config_kwargs = get_peft_kwargs(rank, network_alphas, state_dict)
+            lora_config_kwargs = get_peft_kwargs(rank, network_alphas, state_dict, is_unet=True)
             lora_config = LoraConfig(**lora_config_kwargs)
 
             # adapter_name
@@ -1747,7 +1747,9 @@ def load_lora_into_text_encoder(
                 if cls.use_peft_backend:
                     from peft import LoraConfig
 
-                    lora_config_kwargs = get_peft_kwargs(rank, network_alphas, text_encoder_lora_state_dict)
+                    lora_config_kwargs = get_peft_kwargs(
+                        rank, network_alphas, text_encoder_lora_state_dict, is_unet=False
+                    )
 
                     lora_config = LoraConfig(**lora_config_kwargs)
 
diff --git a/src/diffusers/utils/peft_utils.py b/src/diffusers/utils/peft_utils.py
index 53006cd54c9d..3d3cdcc19fc0 100644
--- a/src/diffusers/utils/peft_utils.py
+++ b/src/diffusers/utils/peft_utils.py
@@ -111,10 +111,11 @@ def unscale_lora_layers(model):
             module.unscale_layer()
 
 
-def get_peft_kwargs(rank_dict, network_alpha_dict, peft_state_dict):
+def get_peft_kwargs(rank_dict, network_alpha_dict, peft_state_dict, is_unet=True):
     rank_pattern = {}
     alpha_pattern = {}
     r = lora_alpha = list(rank_dict.values())[0]
+
     if len(set(rank_dict.values())) > 1:
         # get the rank occuring the most number of times
         r = collections.Counter(rank_dict.values()).most_common()[0][0]
@@ -130,9 +131,13 @@ def get_peft_kwargs(rank_dict, network_alpha_dict, peft_state_dict):
 
             # for modules with alpha different from the most occuring alpha, add it to the `alpha_pattern`
             alpha_pattern = dict(filter(lambda x: x[1] != lora_alpha, network_alpha_dict.items()))
-            alpha_pattern = {
-                ".".join(k.split(".lora_A.")[0].split(".")).replace(".alpha", ""): v for k, v in alpha_pattern.items()
-            }
+            if is_unet:
+                alpha_pattern = {
+                    ".".join(k.split(".lora_A.")[0].split(".")).replace(".alpha", ""): v
+                    for k, v in alpha_pattern.items()
+                }
+            else:
+                alpha_pattern = {".".join(k.split(".down.")[0].split(".")[:-1]): v for k, v in alpha_pattern.items()}
         else:
             lora_alpha = set(network_alpha_dict.values()).pop()
 

From abb23259590aee62cf587dc74390012b355298d2 Mon Sep 17 00:00:00 2001
From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com>
Date: Fri, 6 Oct 2023 10:54:55 +0200
Subject: [PATCH 056/134] Update src/diffusers/loaders.py

Co-authored-by: Benjamin Bossan <BenjaminBossan@users.noreply.github.com>
---
 src/diffusers/loaders.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index 53fa2b2f771d..349e7baac788 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -1629,12 +1629,13 @@ def load_lora_into_unet(
 
             if incompatible_keys is not None:
                 # check only for unexpected keys
-                if hasattr(incompatible_keys, "unexpected_keys") and len(incompatible_keys.unexpected_keys) > 0:
+                unexpected_keys = getattr(incompatible_keys, "unexpected_keys", None)
+                if unexpected_keys:
                     logger.warning(
                         f"Loading adapter weights from state_dict led to unexpected keys not found in the model: "
-                        f" {incompatible_keys.unexpected_keys}. "
+                        f" {unexpected_keys}. "
                     )
-                elif hasattr(incompatible_keys, "unexpected_keys") and len(incompatible_keys.unexpected_keys) == 0:
+                elif unexpected_keys is not None:
                     # At this point all LoRA layars has been loaded so we init back an empty state_dict
                     state_dict = {}
 

From 81db89fd75bb1bda35b1a2c907f670c217d47790 Mon Sep 17 00:00:00 2001
From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com>
Date: Fri, 6 Oct 2023 10:55:59 +0200
Subject: [PATCH 057/134] Update src/diffusers/models/attention.py

Co-authored-by: Benjamin Bossan <BenjaminBossan@users.noreply.github.com>
---
 src/diffusers/models/attention.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/diffusers/models/attention.py b/src/diffusers/models/attention.py
index b45cb6a0717e..2bbeca90ae83 100644
--- a/src/diffusers/models/attention.py
+++ b/src/diffusers/models/attention.py
@@ -358,11 +358,8 @@ def gelu(self, gate):
         return F.gelu(gate.to(dtype=torch.float32)).to(dtype=gate.dtype)
 
     def forward(self, hidden_states, scale: float = 1.0):
-        hidden_states, gate = (
-            self.proj(hidden_states, scale).chunk(2, dim=-1)
-            if not USE_PEFT_BACKEND
-            else self.proj(hidden_states).chunk(2, dim=-1)
-        )
+        args = () if USE_PEFT_BACKEND else (scale,)
+        hidden_states, gate = self.proj(hidden_states, *args).chunk(2, dim=-1)
         return hidden_states * self.gelu(gate)
 
 

From fc643eb6c2e0a10425866f235b0a1997a02f440c Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Fri, 6 Oct 2023 09:05:24 +0000
Subject: [PATCH 058/134] up

---
 src/diffusers/models/attention_processor.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/diffusers/models/attention_processor.py b/src/diffusers/models/attention_processor.py
index ddecfc318b89..b7eeb8b03f16 100644
--- a/src/diffusers/models/attention_processor.py
+++ b/src/diffusers/models/attention_processor.py
@@ -1013,7 +1013,8 @@ def __call__(
         if attn.group_norm is not None:
             hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)
 
-        query = attn.to_q(hidden_states, scale=scale) if not USE_PEFT_BACKEND else attn.to_q(hidden_states)
+        args = () if USE_PEFT_BACKEND else (scale,)
+        query = attn.to_q(hidden_states, *args)
 
         if encoder_hidden_states is None:
             encoder_hidden_states = hidden_states

From 957108b7dc730cfe886000c535d12dab528b82c5 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Fri, 6 Oct 2023 09:06:32 +0000
Subject: [PATCH 059/134] up

---
 src/diffusers/models/modeling_utils.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
index 4a3cbecbd22c..915165cea05d 100644
--- a/src/diffusers/models/modeling_utils.py
+++ b/src/diffusers/models/modeling_utils.py
@@ -295,7 +295,7 @@ def disable_xformers_memory_efficient_attention(self):
         """
         self.set_use_memory_efficient_attention_xformers(False)
 
-    def add_adapter(self, adapter_config, adapter_name: Optional[str] = None) -> None:
+    def add_adapter(self, adapter_config, adapter_name: str = "default") -> None:
         r"""
         If you are not familiar with adapters and PEFT methods, we invite you to read more about them on the PEFT
         official documentation: https://huggingface.co/docs/peft
@@ -315,8 +315,6 @@ def add_adapter(self, adapter_config, adapter_name: Optional[str] = None) -> Non
 
         from peft import PeftConfig, inject_adapter_in_model
 
-        adapter_name = adapter_name or "default"
-
         if not self._hf_peft_config_loaded:
             self._hf_peft_config_loaded = True
         elif adapter_name in self.peft_config:

From 5d9ce0d8d1c086717b06f05bcb101b261defaa6e Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Fri, 6 Oct 2023 09:07:14 +0000
Subject: [PATCH 060/134] clarify comment

---
 src/diffusers/models/modeling_utils.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
index 915165cea05d..313b0a23e9ba 100644
--- a/src/diffusers/models/modeling_utils.py
+++ b/src/diffusers/models/modeling_utils.py
@@ -326,8 +326,7 @@ def add_adapter(self, adapter_config, adapter_name: str = "default") -> None:
             )
 
         # Unlike transformers, here we don't need to retrieve the name_or_path of the unet as the loading logic is
-        # handled by the `load_lora_layers` or `LoraLoaderMixin`.
-        # @patrickvonplaten @sayakpaul do we have an equivalent of `model.config.name_or_path` in diffusers?
+        # handled by the `load_lora_layers` or `LoraLoaderMixin`. Therefore we set it to `None` here.
         adapter_config.base_model_name_or_path = None
         inject_adapter_in_model(adapter_config, self, adapter_name)
         self.set_adapter(adapter_name)

From bd44f56be74de1ef91b7ca040f7cd7d08e6f7c0f Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Fri, 6 Oct 2023 09:08:13 +0000
Subject: [PATCH 061/134] comments

---
 src/diffusers/models/modeling_utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
index 313b0a23e9ba..9fae1777c5dd 100644
--- a/src/diffusers/models/modeling_utils.py
+++ b/src/diffusers/models/modeling_utils.py
@@ -297,13 +297,13 @@ def disable_xformers_memory_efficient_attention(self):
 
     def add_adapter(self, adapter_config, adapter_name: str = "default") -> None:
         r"""
-        If you are not familiar with adapters and PEFT methods, we invite you to read more about them on the PEFT
-        official documentation: https://huggingface.co/docs/peft
-
         Adds a fresh new adapter to the current model for training purpose. If no adapter name is passed, a default
         name is assigned to the adapter to follow the convention of PEFT library (in PEFT we use "default" as the
         default adapter name).
 
+        If you are not familiar with adapters and PEFT methods, we invite you to read more about them on the PEFT
+        official documentation: https://huggingface.co/docs/peft
+
         Args:
             adapter_config (`~peft.PeftConfig`):
                 The configuration of the adapter to add, supported adapters are non-prefix tuning and adaption prompts

From 71c321e375638b948b250271ec9c9049c7c4ac6d Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Fri, 6 Oct 2023 09:09:02 +0000
Subject: [PATCH 062/134] change comment order

---
 src/diffusers/models/modeling_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
index 9fae1777c5dd..27e49f6433bf 100644
--- a/src/diffusers/models/modeling_utils.py
+++ b/src/diffusers/models/modeling_utils.py
@@ -333,11 +333,11 @@ def add_adapter(self, adapter_config, adapter_name: str = "default") -> None:
 
     def set_adapter(self, adapter_name: Union[str, List[str]]) -> None:
         """
+        Sets a specific adapter by forcing the model to use a that adapter and disable the other adapters.
+
         If you are not familiar with adapters and PEFT methods, we invite you to read more about them on the PEFT
         official documentation: https://huggingface.co/docs/peft
 
-        Sets a specific adapter by forcing the model to use a that adapter and disable the other adapters.
-
         Args:
             adapter_name (Union[str, List[str]])):
                 The list of adapters to set or the adapter name in case of single adapter.

From c42d974b9e1e06831cae6f80d61752ec44a7c8c9 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Fri, 6 Oct 2023 09:10:48 +0000
Subject: [PATCH 063/134] change comment order

---
 src/diffusers/models/modeling_utils.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
index 27e49f6433bf..160b4e865203 100644
--- a/src/diffusers/models/modeling_utils.py
+++ b/src/diffusers/models/modeling_utils.py
@@ -383,10 +383,10 @@ def set_adapter(self, adapter_name: Union[str, List[str]]) -> None:
 
     def disable_adapters(self) -> None:
         r"""
+        Disable all adapters that are attached to the model. This leads to inferring with the base model only.
+        
         If you are not familiar with adapters and PEFT methods, we invite you to read more about them on the PEFT
         official documentation: https://huggingface.co/docs/peft
-
-        Disable all adapters that are attached to the model. This leads to inferring with the base model only.
         """
         check_peft_version(min_version=MIN_PEFT_VERSION)
 
@@ -401,10 +401,10 @@ def disable_adapters(self) -> None:
 
     def enable_adapters(self) -> None:
         """
+        Enable adapters that are attached to the model. The model will use `self.active_adapter()`
+
         If you are not familiar with adapters and PEFT methods, we invite you to read more about them on the PEFT
         official documentation: https://huggingface.co/docs/peft
-
-        Enable adapters that are attached to the model. The model will use `self.active_adapter()`
         """
         check_peft_version(min_version=MIN_PEFT_VERSION)
 
@@ -419,10 +419,10 @@ def enable_adapters(self) -> None:
 
     def active_adapters(self) -> List[str]:
         """
+        Gets the current list of active adapters of the model.
+
         If you are not familiar with adapters and PEFT methods, we invite you to read more about them on the PEFT
         official documentation: https://huggingface.co/docs/peft
-
-        Gets the current list of active adapters of the model.
         """
         check_peft_version(min_version=MIN_PEFT_VERSION)
 

From a0598e6b9e167de84468bed4338533dd46c2bfc9 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Fri, 6 Oct 2023 11:55:52 +0000
Subject: [PATCH 064/134] stylr & quality

---
 src/diffusers/models/modeling_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
index 160b4e865203..9ad56cb67911 100644
--- a/src/diffusers/models/modeling_utils.py
+++ b/src/diffusers/models/modeling_utils.py
@@ -384,7 +384,7 @@ def set_adapter(self, adapter_name: Union[str, List[str]]) -> None:
     def disable_adapters(self) -> None:
         r"""
         Disable all adapters that are attached to the model. This leads to inferring with the base model only.
-        
+
         If you are not familiar with adapters and PEFT methods, we invite you to read more about them on the PEFT
         official documentation: https://huggingface.co/docs/peft
         """

From a7a6cd619ab441a3792eefa6ba7b05d9722ec752 Mon Sep 17 00:00:00 2001
From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com>
Date: Fri, 6 Oct 2023 13:57:17 +0200
Subject: [PATCH 065/134] Update tests/lora/test_lora_layers_peft.py

Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
---
 tests/lora/test_lora_layers_peft.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index 0868af8362f5..9eec6017ba17 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -833,7 +833,7 @@ def test_integration_logits_no_scale(self):
 
         self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
 
-    @slow
+    @nightly
     @require_torch_gpu
     def test_integration_logits_multi_adapter(self):
         path = "stabilityai/stable-diffusion-xl-base-1.0"

From 9992964cd7e98aec6db77f1a29d4fe6e78831367 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Fri, 6 Oct 2023 12:37:23 +0000
Subject: [PATCH 066/134] fix bugs and add tests

---
 src/diffusers/loaders.py            | 45 ++++++++++++++++++++++
 tests/lora/test_lora_layers_peft.py | 59 ++++++++++++++++++++++++++++-
 2 files changed, 103 insertions(+), 1 deletion(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index 349e7baac788..75311d4851fe 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -2479,6 +2479,51 @@ def enable_lora(self):
         if hasattr(self, "text_encoder_2"):
             self.enable_lora_for_text_encoder(self.text_encoder_2)
 
+    def set_lora_device(self, adapter_names: List[str], device: Union[torch.device, str, int]) -> None:
+        """
+        Sets the LoRAs that are listed in `adapter_names` into a target device. Usefull for offloading the LoRA in CPU
+        in case one wants to load multiple adapters and free some GPU memory.
+
+        Note this is specific to LoRA and needs some tweaking for new adapter architectures (e.g. IA3, AdaLora..)
+
+        Args:
+            adapter_names (`List[str]`):
+                List of adapters to send device to.
+            device (`Union[torch.device, str, int]`):
+                Device to send the adapters to. Can be either a torch device, a str or an integer.
+        """
+        if not self.use_peft_backend:
+            raise ValueError("PEFT backend is required for this method.")
+
+        from peft.tuners.tuners_utils import BaseTunerLayer
+
+        # Handle the UNET
+        for unet_module in self.unet.modules():
+            if isinstance(unet_module, BaseTunerLayer):
+                # loop over submodules
+                for name, unet_submodule in unet_module.named_children():
+                    if name in ["lora_A", "lora_B"]:
+                        for adapter_name in adapter_names:
+                            unet_submodule[adapter_name].to(device)
+
+        # Handle the text encoder
+        modules_to_process = []
+        if hasattr(self, "text_encoder"):
+            modules_to_process.append(self.text_encoder)
+
+        if hasattr(self, "text_encoder_2"):
+            modules_to_process.append(self.text_encoder_2)
+
+        for text_encoder in modules_to_process:
+            # loop over submodules
+            for text_encoder_module in text_encoder.modules():
+                if isinstance(text_encoder_module, BaseTunerLayer):
+                    # loop over submodules
+                    for name, text_encoder_submodule in text_encoder_module.named_children():
+                        if name in ["lora_A", "lora_B"]:
+                            for adapter_name in adapter_names:
+                                text_encoder_submodule[adapter_name].to(device)
+
 
 class FromSingleFileMixin:
     """
diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index 9eec6017ba17..d0506919e795 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -36,7 +36,7 @@
     LoRAAttnProcessor2_0,
 )
 from diffusers.utils.import_utils import is_peft_available
-from diffusers.utils.testing_utils import floats_tensor, require_peft_backend, require_torch_gpu, slow
+from diffusers.utils.testing_utils import floats_tensor, nightly, require_peft_backend, require_torch_gpu, slow
 
 
 if is_peft_available():
@@ -777,6 +777,63 @@ class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
         "latent_channels": 4,
     }
 
+    @slow
+    @require_torch_gpu
+    def test_integration_move_lora_cpu(self):
+        path = "runwayml/stable-diffusion-v1-5"
+        lora_id = "takuma104/lora-test-text-encoder-lora-target"
+
+        pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float32)
+        pipe.load_lora_weights(lora_id, adapter_name="adapter-1")
+        pipe.load_lora_weights(lora_id, adapter_name="adapter-2")
+        pipe = pipe.to("cuda")
+
+        self.assertTrue(
+            self.check_if_lora_correctly_set(pipe.text_encoder),
+            "Lora not correctly set in text encoder",
+        )
+
+        self.assertTrue(
+            self.check_if_lora_correctly_set(pipe.unet),
+            "Lora not correctly set in text encoder",
+        )
+
+        pipe.set_lora_device(["adapter-1"], "cpu")
+
+        for name, module in pipe.unet.named_modules():
+            if "adapter-1" in name and not isinstance(module, (nn.Dropout, nn.Identity)):
+                self.assertTrue(module.weight.device == torch.device("cpu"))
+            elif "adapter-2" in name and not isinstance(module, (nn.Dropout, nn.Identity)):
+                # import pdb; pdb.set_trace()
+                self.assertTrue(module.weight.device != torch.device("cpu"))
+
+        for name, module in pipe.text_encoder.named_modules():
+            if "adapter-1" in name and not isinstance(module, (nn.Dropout, nn.Identity)):
+                self.assertTrue(module.weight.device == torch.device("cpu"))
+            elif "adapter-2" in name and not isinstance(module, (nn.Dropout, nn.Identity)):
+                # import pdb; pdb.set_trace()
+                self.assertTrue(module.weight.device != torch.device("cpu"))
+
+        pipe.set_lora_device(["adapter-1"], 0)
+
+        for n, m in pipe.unet.named_modules():
+            if "adapter-1" in n and not isinstance(m, (nn.Dropout, nn.Identity)):
+                self.assertTrue(m.weight.device != torch.device("cpu"))
+
+        for n, m in pipe.text_encoder.named_modules():
+            if "adapter-1" in n and not isinstance(m, (nn.Dropout, nn.Identity)):
+                self.assertTrue(m.weight.device != torch.device("cpu"))
+
+        pipe.set_lora_device(["adapter-1", "adapter-2"], "cuda")
+
+        for n, m in pipe.unet.named_modules():
+            if ("adapter-1" in n or "adapter-2" in n) and not isinstance(m, (nn.Dropout, nn.Identity)):
+                self.assertTrue(m.weight.device != torch.device("cpu"))
+
+        for n, m in pipe.text_encoder.named_modules():
+            if ("adapter-1" in n or "adapter-2" in n) and not isinstance(m, (nn.Dropout, nn.Identity)):
+                self.assertTrue(m.weight.device != torch.device("cpu"))
+
     @slow
     @require_torch_gpu
     def test_integration_logits_with_scale(self):

From 525743e50167ffc8e5bf73f5cfff8261063805c7 Mon Sep 17 00:00:00 2001
From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com>
Date: Fri, 6 Oct 2023 14:38:05 +0200
Subject: [PATCH 067/134] Update src/diffusers/models/modeling_utils.py

Co-authored-by: Benjamin Bossan <BenjaminBossan@users.noreply.github.com>
---
 src/diffusers/models/modeling_utils.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
index 9ad56cb67911..610a2a264fb7 100644
--- a/src/diffusers/models/modeling_utils.py
+++ b/src/diffusers/models/modeling_utils.py
@@ -397,7 +397,11 @@ def disable_adapters(self) -> None:
 
         for _, module in self.named_modules():
             if isinstance(module, BaseTunerLayer):
-                module.disable_adapters = True
+                if hasattr(module, "enable_adapters"):
+                    module.enable_adapters(enabled=False)
+                else:
+                    # support for older PEFT versions
+                    module.disable_adapters = True
 
     def enable_adapters(self) -> None:
         """

From 71838633a9446749e6e6810c11b464b9ba8527a6 Mon Sep 17 00:00:00 2001
From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com>
Date: Fri, 6 Oct 2023 14:39:03 +0200
Subject: [PATCH 068/134] Update src/diffusers/models/modeling_utils.py

Co-authored-by: Benjamin Bossan <BenjaminBossan@users.noreply.github.com>
---
 src/diffusers/models/modeling_utils.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
index 610a2a264fb7..752ec047dd05 100644
--- a/src/diffusers/models/modeling_utils.py
+++ b/src/diffusers/models/modeling_utils.py
@@ -419,7 +419,11 @@ def enable_adapters(self) -> None:
 
         for _, module in self.named_modules():
             if isinstance(module, BaseTunerLayer):
-                module.disable_adapters = False
+                if hasattr(module, "enable_adapters"):
+                    module.enable_adapters(enabled=True)
+                else:
+                    # support for older PEFT versions
+                    module.disable_adapters = False
 
     def active_adapters(self) -> List[str]:
         """

From e44c17ce2794604b4df57a57d3f29c33de9dbc20 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Fri, 6 Oct 2023 12:43:25 +0000
Subject: [PATCH 069/134] refactor

---
 src/diffusers/models/modeling_utils.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
index 752ec047dd05..9bb3d70de231 100644
--- a/src/diffusers/models/modeling_utils.py
+++ b/src/diffusers/models/modeling_utils.py
@@ -346,18 +346,18 @@ def set_adapter(self, adapter_name: Union[str, List[str]]) -> None:
 
         if not self._hf_peft_config_loaded:
             raise ValueError("No adapter loaded. Please load an adapter first.")
-        elif isinstance(adapter_name, list):
-            missing = set(adapter_name) - set(self.peft_config)
-            if len(missing) > 0:
-                raise ValueError(
-                    f"Following adapter(s) could not be found: {', '.join(missing)}. Make sure you are passing the correct adapter name(s)."
-                    f" current loaded adapters are: {list(self.peft_config.keys())}"
-                )
-        elif adapter_name not in self.peft_config:
+
+        if isinstance(adapter_name, str):
+            adapter_name = [adapter_name]
+
+        missing = set(adapter_name) - set(self.peft_config)
+        if len(missing) > 0:
             raise ValueError(
-                f"Adapter with name {adapter_name} not found. Please pass the correct adapter name among {list(self.peft_config.keys())}"
+                f"Following adapter(s) could not be found: {', '.join(missing)}. Make sure you are passing the correct adapter name(s)."
+                f" current loaded adapters are: {list(self.peft_config.keys())}"
             )
 
+
         from peft.tuners.tuners_utils import BaseTunerLayer
 
         _adapters_has_been_set = False

From f435ce9d7fc6f47c6ca51cba24a585053a09e915 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Fri, 6 Oct 2023 12:48:01 +0000
Subject: [PATCH 070/134] suggestion

---
 src/diffusers/models/modeling_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
index 9bb3d70de231..92e594971c45 100644
--- a/src/diffusers/models/modeling_utils.py
+++ b/src/diffusers/models/modeling_utils.py
@@ -367,7 +367,7 @@ def set_adapter(self, adapter_name: Union[str, List[str]]) -> None:
                 if hasattr(module, "set_adapter"):
                     module.set_adapter(adapter_name)
                 # Previous versions of PEFT does not support multi-adapter inference
-                elif not hasattr(module, "set_adapter") and isinstance(adapter_name, list):
+                elif not hasattr(module, "set_adapter") and len(adapter_name) != 1:
                     raise ValueError(
                         "You are trying to set multiple adapters and you have a PEFT version that does not support multi-adapter inference. Please upgrade to the latest version of PEFT."
                         " `pip install -U peft` or `pip install -U git+https://github.com/huggingface/peft.git`"

From 7e8cb7a6db13c9f3fb4d8adb4b6abfe993de9694 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Fri, 6 Oct 2023 12:57:33 +0000
Subject: [PATCH 071/134] add break statemebt

---
 src/diffusers/models/modeling_utils.py  |  1 -
 src/diffusers/utils/state_dict_utils.py | 13 +++++++++++--
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
index 92e594971c45..3cb0cc6d88aa 100644
--- a/src/diffusers/models/modeling_utils.py
+++ b/src/diffusers/models/modeling_utils.py
@@ -357,7 +357,6 @@ def set_adapter(self, adapter_name: Union[str, List[str]]) -> None:
                 f" current loaded adapters are: {list(self.peft_config.keys())}"
             )
 
-
         from peft.tuners.tuners_utils import BaseTunerLayer
 
         _adapters_has_been_set = False
diff --git a/src/diffusers/utils/state_dict_utils.py b/src/diffusers/utils/state_dict_utils.py
index aada09a6be7f..777c611f7150 100644
--- a/src/diffusers/utils/state_dict_utils.py
+++ b/src/diffusers/utils/state_dict_utils.py
@@ -41,7 +41,6 @@ class StateDictType(enum.Enum):
     ".to_v_lora.up": ".to_v.lora_B",
     ".lora.up": ".lora_B",
     ".lora.down": ".lora_A",
-    ".processor.": ".",
 }
 
 
@@ -103,6 +102,10 @@ class StateDictType(enum.Enum):
     StateDictType.PEFT: PEFT_TO_DIFFUSERS,
 }
 
+KEYS_TO_ALWAYS_REPLACE = {
+    ".processor.": ".",
+}
+
 
 def convert_state_dict(state_dict, mapping):
     r"""
@@ -122,11 +125,17 @@ def convert_state_dict(state_dict, mapping):
     """
     converted_state_dict = {}
     for k, v in state_dict.items():
+        # First, filter out the keys that we always want to replace
+        for pattern in KEYS_TO_ALWAYS_REPLACE.keys():
+            if pattern in k:
+                new_pattern = KEYS_TO_ALWAYS_REPLACE[pattern]
+                k = k.replace(pattern, new_pattern)
+
         for pattern in mapping.keys():
             if pattern in k:
                 new_pattern = mapping[pattern]
                 k = k.replace(pattern, new_pattern)
-                # break
+                break
         converted_state_dict[k] = v
     return converted_state_dict
 

From 2af9bfdcfbb3ebbf5305ba6288c4ad382f8167e6 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Fri, 6 Oct 2023 13:34:51 +0000
Subject: [PATCH 072/134] add compile tests

---
 tests/lora/test_lora_layers_peft.py | 34 +++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index d0506919e795..430cd1d391fa 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -746,6 +746,40 @@ def test_simple_inference_with_text_unet_multi_adapter(self):
             "output with no lora and output with lora disabled should give same results",
         )
 
+    @unittest.skip("This is failing for now - need to investigate")
+    def test_simple_inference_with_text_unet_lora_unfused_torch_compile(self):
+        """
+        Tests a simple inference with lora attached to text encoder and unet, then unloads the lora weights
+        and makes sure it works as expected
+        """
+        components, _, text_lora_config, unet_lora_config = self.get_dummy_components()
+        pipe = self.pipeline_class(**components)
+        pipe = pipe.to(self.torch_device)
+        pipe.set_progress_bar_config(disable=None)
+        _, _, inputs = self.get_dummy_inputs(with_generator=False)
+
+        pipe.text_encoder.add_adapter(text_lora_config)
+        pipe.unet.add_adapter(unet_lora_config)
+
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
+
+        if self.has_two_text_encoders:
+            pipe.text_encoder_2.add_adapter(text_lora_config)
+            self.assertTrue(
+                self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
+            )
+
+        pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
+        pipe.text_encoder = torch.compile(pipe.text_encoder, mode="reduce-overhead", fullgraph=True)
+
+        if self.has_two_text_encoders:
+            pipe.text_encoder_2 = torch.compile(pipe.text_encoder_2, mode="reduce-overhead", fullgraph=True)
+
+        # Just makes sure it works.. 
+        _ = pipe(**inputs, generator=torch.manual_seed(0)).images
+
+
 
 class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
     pipeline_class = StableDiffusionPipeline

From 8da2350f662cdcba7f8e82bd7c552323ce80a19d Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Fri, 6 Oct 2023 16:18:55 +0000
Subject: [PATCH 073/134] move slow tests to peft tests as I modified them

---
 tests/lora/test_lora_layers_peft.py | 599 +++++++++++++++++++++++++++-
 1 file changed, 598 insertions(+), 1 deletion(-)

diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index 430cd1d391fa..b741a6b3e016 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -35,9 +35,11 @@
     LoRAAttnProcessor,
     LoRAAttnProcessor2_0,
 )
-from diffusers.utils.import_utils import is_peft_available
+from diffusers.utils.import_utils import is_peft_available, is_accelerate_available
 from diffusers.utils.testing_utils import floats_tensor, nightly, require_peft_backend, require_torch_gpu, slow
 
+if is_accelerate_available():
+    from accelerate.utils import release_memory
 
 if is_peft_available():
     from peft import LoraConfig
@@ -1040,3 +1042,598 @@ class StableDiffusionXLLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
         "latent_channels": 4,
         "sample_size": 128,
     }
+
+
+@slow
+@require_torch_gpu
+class LoraIntegrationTests(unittest.TestCase):
+    def tearDown(self):
+        import gc
+
+        gc.collect()
+        torch.cuda.empty_cache()
+        gc.collect()
+
+    def test_dreambooth_old_format(self):
+        generator = torch.Generator("cpu").manual_seed(0)
+
+        lora_model_id = "hf-internal-testing/lora_dreambooth_dog_example"
+        card = RepoCard.load(lora_model_id)
+        base_model_id = card.data.to_dict()["base_model"]
+
+        pipe = StableDiffusionPipeline.from_pretrained(base_model_id, safety_checker=None)
+        pipe = pipe.to(torch_device)
+        pipe.load_lora_weights(lora_model_id)
+
+        images = pipe(
+            "A photo of a sks dog floating in the river", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+
+        images = images[0, -3:, -3:, -1].flatten()
+
+        expected = np.array([0.7207, 0.6787, 0.6010, 0.7478, 0.6838, 0.6064, 0.6984, 0.6443, 0.5785])
+
+        self.assertTrue(np.allclose(images, expected, atol=1e-4))
+        release_memory(pipe)
+
+    def test_dreambooth_text_encoder_new_format(self):
+        generator = torch.Generator().manual_seed(0)
+
+        lora_model_id = "hf-internal-testing/lora-trained"
+        card = RepoCard.load(lora_model_id)
+        base_model_id = card.data.to_dict()["base_model"]
+
+        pipe = StableDiffusionPipeline.from_pretrained(base_model_id, safety_checker=None)
+        pipe = pipe.to(torch_device)
+        pipe.load_lora_weights(lora_model_id)
+
+        images = pipe("A photo of a sks dog", output_type="np", generator=generator, num_inference_steps=2).images
+
+        images = images[0, -3:, -3:, -1].flatten()
+
+        expected = np.array([0.6628, 0.6138, 0.5390, 0.6625, 0.6130, 0.5463, 0.6166, 0.5788, 0.5359])
+
+        self.assertTrue(np.allclose(images, expected, atol=1e-4))
+        release_memory(pipe)
+
+
+    def test_a1111(self):
+        generator = torch.Generator().manual_seed(0)
+
+        pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None).to(
+            torch_device
+        )
+        lora_model_id = "hf-internal-testing/civitai-light-shadow-lora"
+        lora_filename = "light_and_shadow.safetensors"
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+
+        images = images[0, -3:, -3:, -1].flatten()
+        expected = np.array([0.3636, 0.3708, 0.3694, 0.3679, 0.3829, 0.3677, 0.3692, 0.3688, 0.3292])
+
+        self.assertTrue(np.allclose(images, expected, atol=1e-3))
+        release_memory(pipe)
+
+
+    def test_lycoris(self):
+        generator = torch.Generator().manual_seed(0)
+
+        pipe = StableDiffusionPipeline.from_pretrained(
+            "hf-internal-testing/Amixx", safety_checker=None, use_safetensors=True, variant="fp16"
+        ).to(torch_device)
+        lora_model_id = "hf-internal-testing/edgLycorisMugler-light"
+        lora_filename = "edgLycorisMugler-light.safetensors"
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+
+        images = images[0, -3:, -3:, -1].flatten()
+        expected = np.array([0.6463, 0.658, 0.599, 0.6542, 0.6512, 0.6213, 0.658, 0.6485, 0.6017])
+
+        self.assertTrue(np.allclose(images, expected, atol=1e-3))
+        release_memory(pipe)
+
+
+    def test_a1111_with_model_cpu_offload(self):
+        generator = torch.Generator().manual_seed(0)
+
+        pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None)
+        pipe.enable_model_cpu_offload()
+        lora_model_id = "hf-internal-testing/civitai-light-shadow-lora"
+        lora_filename = "light_and_shadow.safetensors"
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+
+        images = images[0, -3:, -3:, -1].flatten()
+        expected = np.array([0.3636, 0.3708, 0.3694, 0.3679, 0.3829, 0.3677, 0.3692, 0.3688, 0.3292])
+
+        self.assertTrue(np.allclose(images, expected, atol=1e-3))
+        release_memory(pipe)
+
+
+    def test_a1111_with_sequential_cpu_offload(self):
+        generator = torch.Generator().manual_seed(0)
+
+        pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None)
+        pipe.enable_sequential_cpu_offload()
+        lora_model_id = "hf-internal-testing/civitai-light-shadow-lora"
+        lora_filename = "light_and_shadow.safetensors"
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+
+        images = images[0, -3:, -3:, -1].flatten()
+        expected = np.array([0.3636, 0.3708, 0.3694, 0.3679, 0.3829, 0.3677, 0.3692, 0.3688, 0.3292])
+
+        self.assertTrue(np.allclose(images, expected, atol=1e-3))
+        release_memory(pipe)
+
+
+    def test_kohya_sd_v15_with_higher_dimensions(self):
+        generator = torch.Generator().manual_seed(0)
+
+        pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", safety_checker=None).to(
+            torch_device
+        )
+        lora_model_id = "hf-internal-testing/urushisato-lora"
+        lora_filename = "urushisato_v15.safetensors"
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+
+        images = images[0, -3:, -3:, -1].flatten()
+        expected = np.array([0.7165, 0.6616, 0.5833, 0.7504, 0.6718, 0.587, 0.6871, 0.6361, 0.5694])
+
+        self.assertTrue(np.allclose(images, expected, atol=1e-3))
+        release_memory(pipe)
+
+
+    def test_vanilla_funetuning(self):
+        generator = torch.Generator().manual_seed(0)
+
+        lora_model_id = "hf-internal-testing/sd-model-finetuned-lora-t4"
+        card = RepoCard.load(lora_model_id)
+        base_model_id = card.data.to_dict()["base_model"]
+
+        pipe = StableDiffusionPipeline.from_pretrained(base_model_id, safety_checker=None)
+        pipe = pipe.to(torch_device)
+        pipe.load_lora_weights(lora_model_id)
+
+        images = pipe("A pokemon with blue eyes.", output_type="np", generator=generator, num_inference_steps=2).images
+
+        images = images[0, -3:, -3:, -1].flatten()
+
+        expected = np.array([0.7406, 0.699, 0.5963, 0.7493, 0.7045, 0.6096, 0.6886, 0.6388, 0.583])
+
+        self.assertTrue(np.allclose(images, expected, atol=1e-4))
+        release_memory(pipe)
+
+
+    def test_unload_kohya_lora(self):
+        generator = torch.manual_seed(0)
+        prompt = "masterpiece, best quality, mountain"
+        num_inference_steps = 2
+
+        pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", safety_checker=None).to(
+            torch_device
+        )
+        initial_images = pipe(
+            prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
+        ).images
+        initial_images = initial_images[0, -3:, -3:, -1].flatten()
+
+        lora_model_id = "hf-internal-testing/civitai-colored-icons-lora"
+        lora_filename = "Colored_Icons_by_vizsumit.safetensors"
+
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+        generator = torch.manual_seed(0)
+        lora_images = pipe(
+            prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
+        ).images
+        lora_images = lora_images[0, -3:, -3:, -1].flatten()
+
+        pipe.unload_lora_weights()
+        generator = torch.manual_seed(0)
+        unloaded_lora_images = pipe(
+            prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
+        ).images
+        unloaded_lora_images = unloaded_lora_images[0, -3:, -3:, -1].flatten()
+
+        self.assertFalse(np.allclose(initial_images, lora_images))
+        self.assertTrue(np.allclose(initial_images, unloaded_lora_images, atol=1e-3))
+        release_memory(pipe)
+
+    def test_load_unload_load_kohya_lora(self):
+        # This test ensures that a Kohya-style LoRA can be safely unloaded and then loaded
+        # without introducing any side-effects. Even though the test uses a Kohya-style
+        # LoRA, the underlying adapter handling mechanism is format-agnostic.
+        generator = torch.manual_seed(0)
+        prompt = "masterpiece, best quality, mountain"
+        num_inference_steps = 2
+
+        pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", safety_checker=None).to(
+            torch_device
+        )
+        initial_images = pipe(
+            prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
+        ).images
+        initial_images = initial_images[0, -3:, -3:, -1].flatten()
+
+        lora_model_id = "hf-internal-testing/civitai-colored-icons-lora"
+        lora_filename = "Colored_Icons_by_vizsumit.safetensors"
+
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+        generator = torch.manual_seed(0)
+        lora_images = pipe(
+            prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
+        ).images
+        lora_images = lora_images[0, -3:, -3:, -1].flatten()
+
+        pipe.unload_lora_weights()
+        generator = torch.manual_seed(0)
+        unloaded_lora_images = pipe(
+            prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
+        ).images
+        unloaded_lora_images = unloaded_lora_images[0, -3:, -3:, -1].flatten()
+
+        self.assertFalse(np.allclose(initial_images, lora_images))
+        self.assertTrue(np.allclose(initial_images, unloaded_lora_images, atol=1e-3))
+
+        # make sure we can load a LoRA again after unloading and they don't have
+        # any undesired effects.
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+        generator = torch.manual_seed(0)
+        lora_images_again = pipe(
+            prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
+        ).images
+        lora_images_again = lora_images_again[0, -3:, -3:, -1].flatten()
+
+        self.assertTrue(np.allclose(lora_images, lora_images_again, atol=1e-3))
+        release_memory(pipe)
+
+    def test_sdxl_0_9_lora_one(self):
+        generator = torch.Generator().manual_seed(0)
+
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9")
+        lora_model_id = "hf-internal-testing/sdxl-0.9-daiton-lora"
+        lora_filename = "daiton-xl-lora-test.safetensors"
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+        pipe.enable_model_cpu_offload()
+
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+
+        images = images[0, -3:, -3:, -1].flatten()
+        expected = np.array([0.3838, 0.3482, 0.3588, 0.3162, 0.319, 0.3369, 0.338, 0.3366, 0.3213])
+
+        self.assertTrue(np.allclose(images, expected, atol=1e-3))
+        release_memory(pipe)
+
+    def test_sdxl_0_9_lora_two(self):
+        generator = torch.Generator().manual_seed(0)
+
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9")
+        lora_model_id = "hf-internal-testing/sdxl-0.9-costumes-lora"
+        lora_filename = "saijo.safetensors"
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+        pipe.enable_model_cpu_offload()
+
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+
+        images = images[0, -3:, -3:, -1].flatten()
+        expected = np.array([0.3137, 0.3269, 0.3355, 0.255, 0.2577, 0.2563, 0.2679, 0.2758, 0.2626])
+
+        self.assertTrue(np.allclose(images, expected, atol=1e-3))
+        release_memory(pipe)
+
+    def test_sdxl_0_9_lora_three(self):
+        generator = torch.Generator().manual_seed(0)
+
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9")
+        lora_model_id = "hf-internal-testing/sdxl-0.9-kamepan-lora"
+        lora_filename = "kame_sdxl_v2-000020-16rank.safetensors"
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+        pipe.enable_model_cpu_offload()
+
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+
+        images = images[0, -3:, -3:, -1].flatten()
+        expected = np.array([0.4015, 0.3761, 0.3616, 0.3745, 0.3462, 0.3337, 0.3564, 0.3649, 0.3468])
+
+        self.assertTrue(np.allclose(images, expected, atol=5e-3))
+        release_memory(pipe)
+
+    def test_sdxl_1_0_lora(self):
+        generator = torch.Generator().manual_seed(0)
+
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
+        pipe.enable_model_cpu_offload()
+        lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
+        lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+
+        images = images[0, -3:, -3:, -1].flatten()
+        expected = np.array([0.4468, 0.4087, 0.4134, 0.366, 0.3202, 0.3505, 0.3786, 0.387, 0.3535])
+
+        self.assertTrue(np.allclose(images, expected, atol=1e-4))
+        release_memory(pipe)
+
+    def test_sdxl_1_0_lora_fusion(self):
+        generator = torch.Generator().manual_seed(0)
+
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
+        lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
+        lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+
+        pipe.fuse_lora()    
+        # We need to unload the lora weights since in the previous API `fuse_lora` led to lora weights being
+        # silently deleted - otherwise this will CPU OOM
+        pipe.unload_lora_weights()
+    
+        pipe.enable_model_cpu_offload()
+
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+
+        images = images[0, -3:, -3:, -1].flatten()
+        # This way we also test equivalence between LoRA fusion and the non-fusion behaviour.
+        expected = np.array([0.4468, 0.4087, 0.4134, 0.366, 0.3202, 0.3505, 0.3786, 0.387, 0.3535])
+
+        self.assertTrue(np.allclose(images, expected, atol=1e-4))
+        release_memory(pipe)
+
+    def test_sdxl_1_0_lora_unfusion(self):
+        generator = torch.Generator().manual_seed(0)
+
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0").to("cuda")
+        lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
+        lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+        pipe.fuse_lora()
+        
+        pipe.enable_model_cpu_offload()
+
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+        images_with_fusion = images[0, -3:, -3:, -1].flatten()
+
+        pipe.unfuse_lora()
+        generator = torch.Generator().manual_seed(0)
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+        images_without_fusion = images[0, -3:, -3:, -1].flatten()
+
+        
+        self.assertFalse(np.allclose(images_with_fusion, images_without_fusion, atol=1e-3))
+        release_memory(pipe)
+
+    def test_sdxl_1_0_lora_unfusion_effectivity(self):
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0").to("cuda")
+        pipe.enable_model_cpu_offload()
+
+        generator = torch.Generator().manual_seed(0)
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+        original_image_slice = images[0, -3:, -3:, -1].flatten()
+
+        lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
+        lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+        pipe.fuse_lora()
+        # We need to unload the lora weights since in the previous API `fuse_lora` led to lora weights being
+        # silently deleted - otherwise this will CPU OOM
+        pipe.unload_lora_weights()
+
+        generator = torch.Generator().manual_seed(0)
+        _ = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+
+        pipe.unfuse_lora()
+        generator = torch.Generator().manual_seed(0)
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+        images_without_fusion_slice = images[0, -3:, -3:, -1].flatten()
+
+        self.assertTrue(np.allclose(original_image_slice, images_without_fusion_slice, atol=1e-3))
+        release_memory(pipe)
+
+    def test_sdxl_1_0_lora_fusion_efficiency(self):
+        generator = torch.Generator().manual_seed(0)
+        lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
+        lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
+
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0").to("cuda")
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+        pipe.enable_model_cpu_offload()
+
+        start_time = time.time()
+        for _ in range(3):
+            pipe(
+                "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+            ).images
+        end_time = time.time()
+        elapsed_time_non_fusion = end_time - start_time
+
+        del pipe
+
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+        pipe.fuse_lora()
+        # We need to unload the lora weights since in the previous API `fuse_lora` led to lora weights being
+        # silently deleted - otherwise this will CPU OOM
+        pipe.unload_lora_weights()
+
+        pipe.enable_model_cpu_offload()
+
+        start_time = time.time()
+        generator = torch.Generator().manual_seed(0)
+        for _ in range(3):
+            pipe(
+                "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+            ).images
+        end_time = time.time()
+        elapsed_time_fusion = end_time - start_time
+
+        self.assertTrue(elapsed_time_fusion < elapsed_time_non_fusion)
+        release_memory(pipe)
+
+    def test_sdxl_1_0_last_ben(self):
+        generator = torch.Generator().manual_seed(0)
+
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
+        pipe.enable_model_cpu_offload()
+        lora_model_id = "TheLastBen/Papercut_SDXL"
+        lora_filename = "papercut.safetensors"
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+
+        images = pipe("papercut.safetensors", output_type="np", generator=generator, num_inference_steps=2).images
+
+        images = images[0, -3:, -3:, -1].flatten()
+        expected = np.array([0.5244, 0.4347, 0.4312, 0.4246, 0.4398, 0.4409, 0.4884, 0.4938, 0.4094])
+
+        self.assertTrue(np.allclose(images, expected, atol=1e-3))
+        release_memory(pipe)
+
+    def test_sdxl_1_0_fuse_unfuse_all(self):
+        pipe = DiffusionPipeline.from_pretrained(
+            "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.bfloat16
+        )
+        text_encoder_1_sd = copy.deepcopy(pipe.text_encoder.state_dict())
+        text_encoder_2_sd = copy.deepcopy(pipe.text_encoder_2.state_dict())
+        unet_sd = copy.deepcopy(pipe.unet.state_dict())
+
+        pipe.load_lora_weights(
+            "davizca87/sun-flower", weight_name="snfw3rXL-000004.safetensors", torch_dtype=torch.bfloat16
+        )
+
+        fused_te_state_dict = pipe.text_encoder.state_dict()
+        fused_te_2_state_dict = pipe.text_encoder_2.state_dict()
+        unet_state_dict = pipe.unet.state_dict()
+
+        for key, value in text_encoder_1_sd.items():
+            self.assertTrue(torch.allclose(fused_te_state_dict[key], value))
+
+        for key, value in text_encoder_2_sd.items():
+            self.assertTrue(torch.allclose(fused_te_2_state_dict[key], value))
+
+        for key, value in unet_state_dict.items():
+            self.assertTrue(torch.allclose(unet_state_dict[key], value))
+
+        pipe.fuse_lora()
+        pipe.unload_lora_weights()
+
+        assert not state_dicts_almost_equal(text_encoder_1_sd, pipe.text_encoder.state_dict())
+        assert not state_dicts_almost_equal(text_encoder_2_sd, pipe.text_encoder_2.state_dict())
+        assert not state_dicts_almost_equal(unet_sd, pipe.unet.state_dict())
+        release_memory(pipe)
+
+    def test_sdxl_1_0_lora_with_sequential_cpu_offloading(self):
+        generator = torch.Generator().manual_seed(0)
+
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
+        pipe.enable_sequential_cpu_offload()
+        lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
+        lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
+
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+
+        images = images[0, -3:, -3:, -1].flatten()
+        expected = np.array([0.4468, 0.4087, 0.4134, 0.366, 0.3202, 0.3505, 0.3786, 0.387, 0.3535])
+
+        self.assertTrue(np.allclose(images, expected, atol=1e-3))
+        release_memory(pipe)
+
+    def test_canny_lora(self):
+        controlnet = ControlNetModel.from_pretrained("diffusers/controlnet-canny-sdxl-1.0")
+
+        pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
+            "stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet
+        )
+        pipe.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors")
+        pipe.enable_sequential_cpu_offload()
+
+        generator = torch.Generator(device="cpu").manual_seed(0)
+        prompt = "corgi"
+        image = load_image(
+            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/bird_canny.png"
+        )
+
+        images = pipe(prompt, image=image, generator=generator, output_type="np", num_inference_steps=3).images
+
+        assert images[0].shape == (768, 512, 3)
+
+        original_image = images[0, -3:, -3:, -1].flatten()
+        expected_image = np.array([0.4574, 0.4461, 0.4435, 0.4462, 0.4396, 0.439, 0.4474, 0.4486, 0.4333])
+        assert np.allclose(original_image, expected_image, atol=1e-04)
+        release_memory(pipe)
+
+    @nightly
+    def test_sequential_fuse_unfuse(self):
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
+
+        # 1. round
+        pipe.load_lora_weights("Pclanglais/TintinIA")
+        pipe.fuse_lora()
+
+        generator = torch.Generator().manual_seed(0)
+        images = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+        image_slice = images[0, -3:, -3:, -1].flatten()
+
+        pipe.unfuse_lora()
+
+        # 2. round
+        pipe.load_lora_weights("ProomptEngineer/pe-balloon-diffusion-style")
+        pipe.fuse_lora()
+        pipe.unfuse_lora()
+
+        # 3. round
+        pipe.load_lora_weights("ostris/crayon_style_lora_sdxl")
+        pipe.fuse_lora()
+        pipe.unfuse_lora()
+
+        # 4. back to 1st round
+        pipe.load_lora_weights("Pclanglais/TintinIA")
+        pipe.fuse_lora()
+
+        generator = torch.Generator().manual_seed(0)
+        images_2 = pipe(
+            "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
+        ).images
+        image_slice_2 = images_2[0, -3:, -3:, -1].flatten()
+
+        self.assertTrue(np.allclose(image_slice, image_slice_2, atol=1e-3))
+        release_memory(pipe)

From f497280e008763e36db632ad6cd1ada51ae20c83 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Fri, 6 Oct 2023 16:27:22 +0000
Subject: [PATCH 074/134] quality

---
 src/diffusers/loaders.py            | 1 -
 tests/lora/test_lora_layers_peft.py | 3 +--
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index 75311d4851fe..be2667fcbffa 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -685,7 +685,6 @@ def _fuse_lora_apply(self, module):
             if isinstance(module, BaseTunerLayer):
                 if self.lora_scale != 1.0:
                     module.scale_layer(self.lora_scale)
-
                 module.merge()
 
     def unfuse_lora(self):
diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index 430cd1d391fa..fe39807c0bec 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -776,11 +776,10 @@ def test_simple_inference_with_text_unet_lora_unfused_torch_compile(self):
         if self.has_two_text_encoders:
             pipe.text_encoder_2 = torch.compile(pipe.text_encoder_2, mode="reduce-overhead", fullgraph=True)
 
-        # Just makes sure it works.. 
+        # Just makes sure it works..
         _ = pipe(**inputs, generator=torch.manual_seed(0)).images
 
 
-
 class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
     pipeline_class = StableDiffusionPipeline
     scheduler_cls = DDIMScheduler

From 74cfc1cfec9d6885278de6c6d2b38d8418ea48dd Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Fri, 6 Oct 2023 16:38:14 +0000
Subject: [PATCH 075/134] refactor a bit

---
 tests/lora/test_lora_layers_peft.py | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index 662674f9da5f..43675e55256e 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -15,6 +15,8 @@
 import os
 import tempfile
 import unittest
+import time
+import copy
 
 import numpy as np
 import torch
@@ -25,10 +27,12 @@
 from diffusers import (
     AutoencoderKL,
     DDIMScheduler,
+    DiffusionPipeline,
     EulerDiscreteScheduler,
     StableDiffusionPipeline,
     StableDiffusionXLPipeline,
     UNet2DConditionModel,
+    StableDiffusionXLControlNetPipeline,
 )
 from diffusers.loaders import AttnProcsLayers
 from diffusers.models.attention_processor import (
@@ -36,7 +40,7 @@
     LoRAAttnProcessor2_0,
 )
 from diffusers.utils.import_utils import is_peft_available, is_accelerate_available
-from diffusers.utils.testing_utils import floats_tensor, nightly, require_peft_backend, require_torch_gpu, slow
+from diffusers.utils.testing_utils import floats_tensor, nightly, require_peft_backend, require_torch_gpu, slow, load_image, torch_device
 
 if is_accelerate_available():
     from accelerate.utils import release_memory
@@ -47,6 +51,17 @@
     from peft.utils import get_peft_model_state_dict
 
 
+def state_dicts_almost_equal(sd1, sd2):
+    sd1 = dict(sorted(sd1.items()))
+    sd2 = dict(sorted(sd2.items()))
+
+    models_are_equal = True
+    for ten1, ten2 in zip(sd1.values(), sd2.values()):
+        if (ten1 - ten2).abs().max() > 1e-3:
+            models_are_equal = False
+
+    return models_are_equal
+
 def create_unet_lora_layers(unet: nn.Module):
     lora_attn_procs = {}
     for name in unet.attn_processors.keys():
@@ -778,10 +793,11 @@ def test_simple_inference_with_text_unet_lora_unfused_torch_compile(self):
         if self.has_two_text_encoders:
             pipe.text_encoder_2 = torch.compile(pipe.text_encoder_2, mode="reduce-overhead", fullgraph=True)
 
-        # Just makes sure it works..
+        # Just makes sure it works.. 
         _ = pipe(**inputs, generator=torch.manual_seed(0)).images
 
 
+
 class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
     pipeline_class = StableDiffusionPipeline
     scheduler_cls = DDIMScheduler

From 36ec7212c265124ba2c49482e0c0f55969fe0be2 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Fri, 6 Oct 2023 16:40:45 +0000
Subject: [PATCH 076/134] style

---
 tests/lora/test_lora_layers_peft.py | 39 ++++++++++++++++-------------
 1 file changed, 21 insertions(+), 18 deletions(-)

diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index 43675e55256e..f33631d8ad35 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -12,35 +12,46 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import copy
 import os
 import tempfile
-import unittest
 import time
-import copy
+import unittest
 
 import numpy as np
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
+from huggingface_hub.repocard import RepoCard
 from transformers import CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer
 
 from diffusers import (
     AutoencoderKL,
+    ControlNetModel,
     DDIMScheduler,
     DiffusionPipeline,
     EulerDiscreteScheduler,
     StableDiffusionPipeline,
+    StableDiffusionXLControlNetPipeline,
     StableDiffusionXLPipeline,
     UNet2DConditionModel,
-    StableDiffusionXLControlNetPipeline,
 )
 from diffusers.loaders import AttnProcsLayers
 from diffusers.models.attention_processor import (
     LoRAAttnProcessor,
     LoRAAttnProcessor2_0,
 )
-from diffusers.utils.import_utils import is_peft_available, is_accelerate_available
-from diffusers.utils.testing_utils import floats_tensor, nightly, require_peft_backend, require_torch_gpu, slow, load_image, torch_device
+from diffusers.utils.import_utils import is_accelerate_available, is_peft_available
+from diffusers.utils.testing_utils import (
+    floats_tensor,
+    load_image,
+    nightly,
+    require_peft_backend,
+    require_torch_gpu,
+    slow,
+    torch_device,
+)
+
 
 if is_accelerate_available():
     from accelerate.utils import release_memory
@@ -62,6 +73,7 @@ def state_dicts_almost_equal(sd1, sd2):
 
     return models_are_equal
 
+
 def create_unet_lora_layers(unet: nn.Module):
     lora_attn_procs = {}
     for name in unet.attn_processors.keys():
@@ -793,11 +805,10 @@ def test_simple_inference_with_text_unet_lora_unfused_torch_compile(self):
         if self.has_two_text_encoders:
             pipe.text_encoder_2 = torch.compile(pipe.text_encoder_2, mode="reduce-overhead", fullgraph=True)
 
-        # Just makes sure it works.. 
+        # Just makes sure it works..
         _ = pipe(**inputs, generator=torch.manual_seed(0)).images
 
 
-
 class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
     pipeline_class = StableDiffusionPipeline
     scheduler_cls = DDIMScheduler
@@ -1111,7 +1122,6 @@ def test_dreambooth_text_encoder_new_format(self):
         self.assertTrue(np.allclose(images, expected, atol=1e-4))
         release_memory(pipe)
 
-
     def test_a1111(self):
         generator = torch.Generator().manual_seed(0)
 
@@ -1132,7 +1142,6 @@ def test_a1111(self):
         self.assertTrue(np.allclose(images, expected, atol=1e-3))
         release_memory(pipe)
 
-
     def test_lycoris(self):
         generator = torch.Generator().manual_seed(0)
 
@@ -1153,7 +1162,6 @@ def test_lycoris(self):
         self.assertTrue(np.allclose(images, expected, atol=1e-3))
         release_memory(pipe)
 
-
     def test_a1111_with_model_cpu_offload(self):
         generator = torch.Generator().manual_seed(0)
 
@@ -1173,7 +1181,6 @@ def test_a1111_with_model_cpu_offload(self):
         self.assertTrue(np.allclose(images, expected, atol=1e-3))
         release_memory(pipe)
 
-
     def test_a1111_with_sequential_cpu_offload(self):
         generator = torch.Generator().manual_seed(0)
 
@@ -1193,7 +1200,6 @@ def test_a1111_with_sequential_cpu_offload(self):
         self.assertTrue(np.allclose(images, expected, atol=1e-3))
         release_memory(pipe)
 
-
     def test_kohya_sd_v15_with_higher_dimensions(self):
         generator = torch.Generator().manual_seed(0)
 
@@ -1214,7 +1220,6 @@ def test_kohya_sd_v15_with_higher_dimensions(self):
         self.assertTrue(np.allclose(images, expected, atol=1e-3))
         release_memory(pipe)
 
-
     def test_vanilla_funetuning(self):
         generator = torch.Generator().manual_seed(0)
 
@@ -1235,7 +1240,6 @@ def test_vanilla_funetuning(self):
         self.assertTrue(np.allclose(images, expected, atol=1e-4))
         release_memory(pipe)
 
-
     def test_unload_kohya_lora(self):
         generator = torch.manual_seed(0)
         prompt = "masterpiece, best quality, mountain"
@@ -1402,11 +1406,11 @@ def test_sdxl_1_0_lora_fusion(self):
         lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
         pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
 
-        pipe.fuse_lora()    
+        pipe.fuse_lora()
         # We need to unload the lora weights since in the previous API `fuse_lora` led to lora weights being
         # silently deleted - otherwise this will CPU OOM
         pipe.unload_lora_weights()
-    
+
         pipe.enable_model_cpu_offload()
 
         images = pipe(
@@ -1428,7 +1432,7 @@ def test_sdxl_1_0_lora_unfusion(self):
         lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
         pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
         pipe.fuse_lora()
-        
+
         pipe.enable_model_cpu_offload()
 
         images = pipe(
@@ -1443,7 +1447,6 @@ def test_sdxl_1_0_lora_unfusion(self):
         ).images
         images_without_fusion = images[0, -3:, -3:, -1].flatten()
 
-        
         self.assertFalse(np.allclose(images_with_fusion, images_without_fusion, atol=1e-3))
         release_memory(pipe)
 

From 95d2b44fd4506e3e02ea20499bb8d1047f820d0b Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Sun, 8 Oct 2023 20:58:12 +0000
Subject: [PATCH 077/134] change import

---
 src/diffusers/loaders.py                    |  2 +-
 src/diffusers/models/attention.py           |  3 +-
 src/diffusers/models/attention_processor.py |  3 +-
 src/diffusers/models/constants.py           | 32 ---------------------
 src/diffusers/models/embeddings.py          |  2 +-
 src/diffusers/models/resnet.py              |  2 +-
 src/diffusers/models/transformer_2d.py      |  2 +-
 src/diffusers/utils/__init__.py             |  1 +
 src/diffusers/utils/constants.py            | 18 ++++++++++++
 tests/lora/test_lora_layers_old_backend.py  |  2 ++
 10 files changed, 29 insertions(+), 38 deletions(-)
 delete mode 100644 src/diffusers/models/constants.py

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index be2667fcbffa..1779707e27c2 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -27,7 +27,6 @@
 from torch import nn
 
 from . import __version__
-from .models import USE_PEFT_BACKEND
 from .models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT, load_model_dict_into_meta
 from .utils import (
     DIFFUSERS_CACHE,
@@ -47,6 +46,7 @@
     scale_lora_layers,
     set_adapter_layers,
     set_weights_and_activate_adapters,
+    USE_PEFT_BACKEND,
 )
 from .utils.import_utils import BACKENDS_MAPPING
 
diff --git a/src/diffusers/models/attention.py b/src/diffusers/models/attention.py
index 2bbeca90ae83..8a8221c883b2 100644
--- a/src/diffusers/models/attention.py
+++ b/src/diffusers/models/attention.py
@@ -20,10 +20,11 @@
 from ..utils.torch_utils import maybe_allow_in_graph
 from .activations import get_activation
 from .attention_processor import Attention
-from .constants import USE_PEFT_BACKEND
+
 from .embeddings import CombinedTimestepLabelEmbeddings
 from .lora import LoRACompatibleLinear
 
+from ..utils import USE_PEFT_BACKEND
 
 @maybe_allow_in_graph
 class GatedSelfAttentionDense(nn.Module):
diff --git a/src/diffusers/models/attention_processor.py b/src/diffusers/models/attention_processor.py
index b7eeb8b03f16..45d527f3810a 100644
--- a/src/diffusers/models/attention_processor.py
+++ b/src/diffusers/models/attention_processor.py
@@ -21,9 +21,10 @@
 from ..utils import deprecate, logging
 from ..utils.import_utils import is_xformers_available
 from ..utils.torch_utils import maybe_allow_in_graph
-from .constants import USE_PEFT_BACKEND
+
 from .lora import LoRACompatibleLinear, LoRALinearLayer
 
+from ..utils import USE_PEFT_BACKEND
 
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
 
diff --git a/src/diffusers/models/constants.py b/src/diffusers/models/constants.py
deleted file mode 100644
index 890b47cfc21e..000000000000
--- a/src/diffusers/models/constants.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# Copyright 2023 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import importlib
-
-from packaging import version
-
-from ..utils.import_utils import is_peft_available
-
-
-# Below should be `True` if the current version of `peft` and `transformers` are compatible with
-# PEFT backend. Will automatically fall back to PEFT backend if the correct versions of the libraries are
-# available.
-# For PEFT it is has to be greater than 0.6.0 and for transformers it has to be greater than 4.33.1.
-_required_peft_version = is_peft_available() and version.parse(
-    version.parse(importlib.metadata.version("peft")).base_version
-) > version.parse("0.5")
-_required_transformers_version = version.parse(
-    version.parse(importlib.metadata.version("transformers")).base_version
-) > version.parse("4.33")
-
-USE_PEFT_BACKEND = _required_peft_version and _required_transformers_version
diff --git a/src/diffusers/models/embeddings.py b/src/diffusers/models/embeddings.py
index 614ce5ed40bb..c6bf06923220 100644
--- a/src/diffusers/models/embeddings.py
+++ b/src/diffusers/models/embeddings.py
@@ -19,9 +19,9 @@
 from torch import nn
 
 from .activations import get_activation
-from .constants import USE_PEFT_BACKEND
 from .lora import LoRACompatibleLinear
 
+from ..utils import USE_PEFT_BACKEND
 
 def get_timestep_embedding(
     timesteps: torch.Tensor,
diff --git a/src/diffusers/models/resnet.py b/src/diffusers/models/resnet.py
index 914a31951836..d3b1790d0534 100644
--- a/src/diffusers/models/resnet.py
+++ b/src/diffusers/models/resnet.py
@@ -23,9 +23,9 @@
 from .activations import get_activation
 from .attention import AdaGroupNorm
 from .attention_processor import SpatialNorm
-from .constants import USE_PEFT_BACKEND
 from .lora import LoRACompatibleConv, LoRACompatibleLinear
 
+from ..utils import USE_PEFT_BACKEND
 
 class Upsample1D(nn.Module):
     """A 1D upsampling layer with an optional convolution.
diff --git a/src/diffusers/models/transformer_2d.py b/src/diffusers/models/transformer_2d.py
index 35111f66e959..7157de330fc2 100644
--- a/src/diffusers/models/transformer_2d.py
+++ b/src/diffusers/models/transformer_2d.py
@@ -22,11 +22,11 @@
 from ..models.embeddings import ImagePositionalEmbeddings
 from ..utils import BaseOutput, deprecate
 from .attention import BasicTransformerBlock
-from .constants import USE_PEFT_BACKEND
 from .embeddings import PatchEmbed
 from .lora import LoRACompatibleConv, LoRACompatibleLinear
 from .modeling_utils import ModelMixin
 
+from ..utils import USE_PEFT_BACKEND
 
 @dataclass
 class Transformer2DModelOutput(BaseOutput):
diff --git a/src/diffusers/utils/__init__.py b/src/diffusers/utils/__init__.py
index ada3eea302da..050360b0959a 100644
--- a/src/diffusers/utils/__init__.py
+++ b/src/diffusers/utils/__init__.py
@@ -30,6 +30,7 @@
     ONNX_WEIGHTS_NAME,
     SAFETENSORS_WEIGHTS_NAME,
     WEIGHTS_NAME,
+    USE_PEFT_BACKEND,
 )
 from .deprecation_utils import deprecate
 from .doc_utils import replace_example_docstring
diff --git a/src/diffusers/utils/constants.py b/src/diffusers/utils/constants.py
index 1f51f2c0497b..db9a7aea430a 100644
--- a/src/diffusers/utils/constants.py
+++ b/src/diffusers/utils/constants.py
@@ -12,6 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
+import importlib
+
+from packaging import version
+
+from .import_utils import is_peft_available
 
 from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE, hf_cache_home
 
@@ -30,3 +35,16 @@
 DIFFUSERS_DYNAMIC_MODULE_NAME = "diffusers_modules"
 HF_MODULES_CACHE = os.getenv("HF_MODULES_CACHE", os.path.join(hf_cache_home, "modules"))
 DEPRECATED_REVISION_ARGS = ["fp16", "non-ema"]
+
+# Below should be `True` if the current version of `peft` and `transformers` are compatible with
+# PEFT backend. Will automatically fall back to PEFT backend if the correct versions of the libraries are
+# available.
+# For PEFT it is has to be greater than 0.6.0 and for transformers it has to be greater than 4.33.1.
+_required_peft_version = is_peft_available() and version.parse(
+    version.parse(importlib.metadata.version("peft")).base_version
+) > version.parse("0.5")
+_required_transformers_version = version.parse(
+    version.parse(importlib.metadata.version("transformers")).base_version
+) > version.parse("4.33")
+
+USE_PEFT_BACKEND = _required_peft_version and _required_transformers_version
\ No newline at end of file
diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index cf1d3496ac4a..e01453cb13d1 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -2173,6 +2173,8 @@ def test_sdxl_1_0_lora_unfusion(self):
         ).images
         images_without_fusion = images[0, -3:, -3:, -1].flatten()
 
+        import pdb; pdb.set_trace()
+
         self.assertFalse(np.allclose(images_with_fusion, images_without_fusion, atol=1e-3))
 
     def test_sdxl_1_0_lora_unfusion_effectivity(self):

From e82d83c5808f3f1597ec9a7cf79b7276f888c1cb Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Mon, 9 Oct 2023 08:29:59 +0000
Subject: [PATCH 078/134] style

---
 src/diffusers/loaders.py                    | 2 +-
 src/diffusers/models/attention.py           | 3 +--
 src/diffusers/models/attention_processor.py | 4 +---
 src/diffusers/models/embeddings.py          | 2 +-
 src/diffusers/models/resnet.py              | 2 +-
 src/diffusers/models/transformer_2d.py      | 3 +--
 src/diffusers/utils/__init__.py             | 2 +-
 src/diffusers/utils/constants.py            | 7 +++----
 tests/lora/test_lora_layers_old_backend.py  | 4 +++-
 9 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index 1779707e27c2..c8279669b315 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -31,6 +31,7 @@
 from .utils import (
     DIFFUSERS_CACHE,
     HF_HUB_OFFLINE,
+    USE_PEFT_BACKEND,
     _get_model_file,
     convert_state_dict_to_diffusers,
     convert_state_dict_to_peft,
@@ -46,7 +47,6 @@
     scale_lora_layers,
     set_adapter_layers,
     set_weights_and_activate_adapters,
-    USE_PEFT_BACKEND,
 )
 from .utils.import_utils import BACKENDS_MAPPING
 
diff --git a/src/diffusers/models/attention.py b/src/diffusers/models/attention.py
index 8a8221c883b2..e4a6537b2bac 100644
--- a/src/diffusers/models/attention.py
+++ b/src/diffusers/models/attention.py
@@ -17,14 +17,13 @@
 import torch.nn.functional as F
 from torch import nn
 
+from ..utils import USE_PEFT_BACKEND
 from ..utils.torch_utils import maybe_allow_in_graph
 from .activations import get_activation
 from .attention_processor import Attention
-
 from .embeddings import CombinedTimestepLabelEmbeddings
 from .lora import LoRACompatibleLinear
 
-from ..utils import USE_PEFT_BACKEND
 
 @maybe_allow_in_graph
 class GatedSelfAttentionDense(nn.Module):
diff --git a/src/diffusers/models/attention_processor.py b/src/diffusers/models/attention_processor.py
index 45d527f3810a..5bb8e2fd9820 100644
--- a/src/diffusers/models/attention_processor.py
+++ b/src/diffusers/models/attention_processor.py
@@ -18,13 +18,11 @@
 import torch.nn.functional as F
 from torch import nn
 
-from ..utils import deprecate, logging
+from ..utils import USE_PEFT_BACKEND, deprecate, logging
 from ..utils.import_utils import is_xformers_available
 from ..utils.torch_utils import maybe_allow_in_graph
-
 from .lora import LoRACompatibleLinear, LoRALinearLayer
 
-from ..utils import USE_PEFT_BACKEND
 
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
 
diff --git a/src/diffusers/models/embeddings.py b/src/diffusers/models/embeddings.py
index c6bf06923220..d3422c8f58b2 100644
--- a/src/diffusers/models/embeddings.py
+++ b/src/diffusers/models/embeddings.py
@@ -18,10 +18,10 @@
 import torch
 from torch import nn
 
+from ..utils import USE_PEFT_BACKEND
 from .activations import get_activation
 from .lora import LoRACompatibleLinear
 
-from ..utils import USE_PEFT_BACKEND
 
 def get_timestep_embedding(
     timesteps: torch.Tensor,
diff --git a/src/diffusers/models/resnet.py b/src/diffusers/models/resnet.py
index d3b1790d0534..d59b32737f1f 100644
--- a/src/diffusers/models/resnet.py
+++ b/src/diffusers/models/resnet.py
@@ -20,12 +20,12 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
+from ..utils import USE_PEFT_BACKEND
 from .activations import get_activation
 from .attention import AdaGroupNorm
 from .attention_processor import SpatialNorm
 from .lora import LoRACompatibleConv, LoRACompatibleLinear
 
-from ..utils import USE_PEFT_BACKEND
 
 class Upsample1D(nn.Module):
     """A 1D upsampling layer with an optional convolution.
diff --git a/src/diffusers/models/transformer_2d.py b/src/diffusers/models/transformer_2d.py
index 7157de330fc2..7e9307b74c59 100644
--- a/src/diffusers/models/transformer_2d.py
+++ b/src/diffusers/models/transformer_2d.py
@@ -20,13 +20,12 @@
 
 from ..configuration_utils import ConfigMixin, register_to_config
 from ..models.embeddings import ImagePositionalEmbeddings
-from ..utils import BaseOutput, deprecate
+from ..utils import USE_PEFT_BACKEND, BaseOutput, deprecate
 from .attention import BasicTransformerBlock
 from .embeddings import PatchEmbed
 from .lora import LoRACompatibleConv, LoRACompatibleLinear
 from .modeling_utils import ModelMixin
 
-from ..utils import USE_PEFT_BACKEND
 
 @dataclass
 class Transformer2DModelOutput(BaseOutput):
diff --git a/src/diffusers/utils/__init__.py b/src/diffusers/utils/__init__.py
index 050360b0959a..8b1ff605e4c8 100644
--- a/src/diffusers/utils/__init__.py
+++ b/src/diffusers/utils/__init__.py
@@ -29,8 +29,8 @@
     ONNX_EXTERNAL_WEIGHTS_NAME,
     ONNX_WEIGHTS_NAME,
     SAFETENSORS_WEIGHTS_NAME,
-    WEIGHTS_NAME,
     USE_PEFT_BACKEND,
+    WEIGHTS_NAME,
 )
 from .deprecation_utils import deprecate
 from .doc_utils import replace_example_docstring
diff --git a/src/diffusers/utils/constants.py b/src/diffusers/utils/constants.py
index db9a7aea430a..6573972bb999 100644
--- a/src/diffusers/utils/constants.py
+++ b/src/diffusers/utils/constants.py
@@ -11,15 +11,14 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import os
 import importlib
+import os
 
+from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE, hf_cache_home
 from packaging import version
 
 from .import_utils import is_peft_available
 
-from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE, hf_cache_home
-
 
 default_cache_path = HUGGINGFACE_HUB_CACHE
 
@@ -47,4 +46,4 @@
     version.parse(importlib.metadata.version("transformers")).base_version
 ) > version.parse("4.33")
 
-USE_PEFT_BACKEND = _required_peft_version and _required_transformers_version
\ No newline at end of file
+USE_PEFT_BACKEND = _required_peft_version and _required_transformers_version
diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index e01453cb13d1..1b518a6281f1 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -2173,7 +2173,9 @@ def test_sdxl_1_0_lora_unfusion(self):
         ).images
         images_without_fusion = images[0, -3:, -3:, -1].flatten()
 
-        import pdb; pdb.set_trace()
+        import pdb
+
+        pdb.set_trace()
 
         self.assertFalse(np.allclose(images_with_fusion, images_without_fusion, atol=1e-3))
 

From f939e04fdd9b582fd85f90f6cb53f55a1830e0de Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Mon, 9 Oct 2023 08:54:40 +0000
Subject: [PATCH 079/134] fix CI

---
 src/diffusers/utils/constants.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/diffusers/utils/constants.py b/src/diffusers/utils/constants.py
index 6573972bb999..05ea4067b414 100644
--- a/src/diffusers/utils/constants.py
+++ b/src/diffusers/utils/constants.py
@@ -17,7 +17,7 @@
 from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE, hf_cache_home
 from packaging import version
 
-from .import_utils import is_peft_available
+from .import_utils import is_peft_available, is_transformers_available
 
 
 default_cache_path = HUGGINGFACE_HUB_CACHE
@@ -42,7 +42,7 @@
 _required_peft_version = is_peft_available() and version.parse(
     version.parse(importlib.metadata.version("peft")).base_version
 ) > version.parse("0.5")
-_required_transformers_version = version.parse(
+_required_transformers_version = is_transformers_available() and version.parse(
     version.parse(importlib.metadata.version("transformers")).base_version
 ) > version.parse("4.33")
 

From 10f635246a150ec7a1b514247adb9f9f27c48bf8 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Mon, 9 Oct 2023 08:58:22 +0000
Subject: [PATCH 080/134] refactor slow tests one last time

---
 tests/lora/test_lora_layers_old_backend.py |  4 +---
 tests/lora/test_lora_layers_peft.py        | 26 +++++++++++++++-------
 2 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index e01453cb13d1..8c3ebbc9448a 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -2258,9 +2258,7 @@ def test_sdxl_1_0_last_ben(self):
         self.assertTrue(np.allclose(images, expected, atol=1e-3))
 
     def test_sdxl_1_0_fuse_unfuse_all(self):
-        pipe = DiffusionPipeline.from_pretrained(
-            "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.bfloat16
-        )
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16).to("cuda")
         text_encoder_1_sd = copy.deepcopy(pipe.text_encoder.state_dict())
         text_encoder_2_sd = copy.deepcopy(pipe.text_encoder_2.state_dict())
         unet_sd = copy.deepcopy(pipe.unet.state_dict())
diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index f33631d8ad35..684f49690bfb 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -1322,10 +1322,20 @@ def test_load_unload_load_kohya_lora(self):
         self.assertTrue(np.allclose(lora_images, lora_images_again, atol=1e-3))
         release_memory(pipe)
 
+@slow
+@require_torch_gpu
+class LoraSDXLIntegrationTests(unittest.TestCase):
+    def tearDown(self):
+        import gc
+
+        gc.collect()
+        torch.cuda.empty_cache()
+        gc.collect()
+
     def test_sdxl_0_9_lora_one(self):
         generator = torch.Generator().manual_seed(0)
 
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9")
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9").to("cuda")
         lora_model_id = "hf-internal-testing/sdxl-0.9-daiton-lora"
         lora_filename = "daiton-xl-lora-test.safetensors"
         pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
@@ -1344,7 +1354,7 @@ def test_sdxl_0_9_lora_one(self):
     def test_sdxl_0_9_lora_two(self):
         generator = torch.Generator().manual_seed(0)
 
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9")
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9").to("cuda")
         lora_model_id = "hf-internal-testing/sdxl-0.9-costumes-lora"
         lora_filename = "saijo.safetensors"
         pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
@@ -1363,7 +1373,7 @@ def test_sdxl_0_9_lora_two(self):
     def test_sdxl_0_9_lora_three(self):
         generator = torch.Generator().manual_seed(0)
 
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9")
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9").to("cuda")
         lora_model_id = "hf-internal-testing/sdxl-0.9-kamepan-lora"
         lora_filename = "kame_sdxl_v2-000020-16rank.safetensors"
         pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
@@ -1382,7 +1392,7 @@ def test_sdxl_0_9_lora_three(self):
     def test_sdxl_1_0_lora(self):
         generator = torch.Generator().manual_seed(0)
 
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0").to("cuda")
         pipe.enable_model_cpu_offload()
         lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
         lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
@@ -1401,7 +1411,7 @@ def test_sdxl_1_0_lora(self):
     def test_sdxl_1_0_lora_fusion(self):
         generator = torch.Generator().manual_seed(0)
 
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0").to("cuda")
         lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
         lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
         pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
@@ -1447,7 +1457,7 @@ def test_sdxl_1_0_lora_unfusion(self):
         ).images
         images_without_fusion = images[0, -3:, -3:, -1].flatten()
 
-        self.assertFalse(np.allclose(images_with_fusion, images_without_fusion, atol=1e-3))
+        self.assertTrue(np.allclose(images_with_fusion, images_without_fusion, atol=1e-3))
         release_memory(pipe)
 
     def test_sdxl_1_0_lora_unfusion_effectivity(self):
@@ -1526,7 +1536,7 @@ def test_sdxl_1_0_lora_fusion_efficiency(self):
     def test_sdxl_1_0_last_ben(self):
         generator = torch.Generator().manual_seed(0)
 
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0").to("cuda")
         pipe.enable_model_cpu_offload()
         lora_model_id = "TheLastBen/Papercut_SDXL"
         lora_filename = "papercut.safetensors"
@@ -1576,7 +1586,7 @@ def test_sdxl_1_0_fuse_unfuse_all(self):
     def test_sdxl_1_0_lora_with_sequential_cpu_offloading(self):
         generator = torch.Generator().manual_seed(0)
 
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0").to("cuda")
         pipe.enable_sequential_cpu_offload()
         lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
         lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"

From 22452b7f2e650438d803302aae0378eae57020e2 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Mon, 9 Oct 2023 08:59:16 +0000
Subject: [PATCH 081/134] style

---
 tests/lora/test_lora_layers_old_backend.py | 4 +++-
 tests/lora/test_lora_layers_peft.py        | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index d6a9e1ef6887..a7b23959ade0 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -2260,7 +2260,9 @@ def test_sdxl_1_0_last_ben(self):
         self.assertTrue(np.allclose(images, expected, atol=1e-3))
 
     def test_sdxl_1_0_fuse_unfuse_all(self):
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16).to("cuda")
+        pipe = DiffusionPipeline.from_pretrained(
+            "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
+        ).to("cuda")
         text_encoder_1_sd = copy.deepcopy(pipe.text_encoder.state_dict())
         text_encoder_2_sd = copy.deepcopy(pipe.text_encoder_2.state_dict())
         unet_sd = copy.deepcopy(pipe.unet.state_dict())
diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index 684f49690bfb..430e845a60e7 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -1322,6 +1322,7 @@ def test_load_unload_load_kohya_lora(self):
         self.assertTrue(np.allclose(lora_images, lora_images_again, atol=1e-3))
         release_memory(pipe)
 
+
 @slow
 @require_torch_gpu
 class LoraSDXLIntegrationTests(unittest.TestCase):

From 48ae256f7d42cef8bf1f76ebf3da72248a3221d7 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Mon, 9 Oct 2023 09:57:51 +0000
Subject: [PATCH 082/134] oops

---
 tests/lora/test_lora_layers_old_backend.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index d6a9e1ef6887..82a171fcf618 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -2173,10 +2173,6 @@ def test_sdxl_1_0_lora_unfusion(self):
         ).images
         images_without_fusion = images[0, -3:, -3:, -1].flatten()
 
-        import pdb
-
-        pdb.set_trace()
-
         self.assertFalse(np.allclose(images_with_fusion, images_without_fusion, atol=1e-3))
 
     def test_sdxl_1_0_lora_unfusion_effectivity(self):

From 06db84d4318fca37ba738a7c72dbefd73ab253c0 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Mon, 9 Oct 2023 09:58:30 +0000
Subject: [PATCH 083/134] oops

---
 tests/lora/test_lora_layers_old_backend.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index a7b23959ade0..6196d433c2d6 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -2173,10 +2173,6 @@ def test_sdxl_1_0_lora_unfusion(self):
         ).images
         images_without_fusion = images[0, -3:, -3:, -1].flatten()
 
-        import pdb
-
-        pdb.set_trace()
-
         self.assertFalse(np.allclose(images_with_fusion, images_without_fusion, atol=1e-3))
 
     def test_sdxl_1_0_lora_unfusion_effectivity(self):

From 0723b550b69ba719fe5eec30d77acd60c2434097 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Mon, 9 Oct 2023 09:59:27 +0000
Subject: [PATCH 084/134] oops

---
 tests/lora/test_lora_layers_old_backend.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index 6196d433c2d6..82f29a8510b2 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -2264,7 +2264,7 @@ def test_sdxl_1_0_fuse_unfuse_all(self):
         unet_sd = copy.deepcopy(pipe.unet.state_dict())
 
         pipe.load_lora_weights(
-            "davizca87/sun-flower", weight_name="snfw3rXL-000004.safetensors", torch_dtype=torch.bfloat16
+            "davizca87/sun-flower", weight_name="snfw3rXL-000004.safetensors", torch_dtype=torch.float16
         )
 
         fused_te_state_dict = pipe.text_encoder.state_dict()

From 44ae0a9b60bbd3a278dcc29cac201592123d06c7 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Mon, 9 Oct 2023 10:50:09 +0000
Subject: [PATCH 085/134] final tweak tests

---
 tests/lora/test_lora_layers_old_backend.py |  7 ++++---
 tests/lora/test_lora_layers_peft.py        | 19 ++++++++++---------
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index 82f29a8510b2..29099129d628 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -1825,6 +1825,7 @@ def test_lora_xformers_on_off(self):
 
 
 @slow
+@deprecate_after_peft_backend
 @require_torch_gpu
 class LoraIntegrationTests(unittest.TestCase):
     def test_dreambooth_old_format(self):
@@ -2257,14 +2258,14 @@ def test_sdxl_1_0_last_ben(self):
 
     def test_sdxl_1_0_fuse_unfuse_all(self):
         pipe = DiffusionPipeline.from_pretrained(
-            "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
-        ).to("cuda")
+            "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.bfloat16
+        )
         text_encoder_1_sd = copy.deepcopy(pipe.text_encoder.state_dict())
         text_encoder_2_sd = copy.deepcopy(pipe.text_encoder_2.state_dict())
         unet_sd = copy.deepcopy(pipe.unet.state_dict())
 
         pipe.load_lora_weights(
-            "davizca87/sun-flower", weight_name="snfw3rXL-000004.safetensors", torch_dtype=torch.float16
+            "davizca87/sun-flower", weight_name="snfw3rXL-000004.safetensors", torch_dtype=torch.bfloat16
         )
 
         fused_te_state_dict = pipe.text_encoder.state_dict()
diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index 430e845a60e7..8e7df3de6efb 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -1336,7 +1336,7 @@ def tearDown(self):
     def test_sdxl_0_9_lora_one(self):
         generator = torch.Generator().manual_seed(0)
 
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9").to("cuda")
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9")
         lora_model_id = "hf-internal-testing/sdxl-0.9-daiton-lora"
         lora_filename = "daiton-xl-lora-test.safetensors"
         pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
@@ -1355,7 +1355,7 @@ def test_sdxl_0_9_lora_one(self):
     def test_sdxl_0_9_lora_two(self):
         generator = torch.Generator().manual_seed(0)
 
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9").to("cuda")
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9")
         lora_model_id = "hf-internal-testing/sdxl-0.9-costumes-lora"
         lora_filename = "saijo.safetensors"
         pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
@@ -1374,7 +1374,7 @@ def test_sdxl_0_9_lora_two(self):
     def test_sdxl_0_9_lora_three(self):
         generator = torch.Generator().manual_seed(0)
 
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9").to("cuda")
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9")
         lora_model_id = "hf-internal-testing/sdxl-0.9-kamepan-lora"
         lora_filename = "kame_sdxl_v2-000020-16rank.safetensors"
         pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
@@ -1393,7 +1393,7 @@ def test_sdxl_0_9_lora_three(self):
     def test_sdxl_1_0_lora(self):
         generator = torch.Generator().manual_seed(0)
 
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0").to("cuda")
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
         pipe.enable_model_cpu_offload()
         lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
         lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
@@ -1412,7 +1412,7 @@ def test_sdxl_1_0_lora(self):
     def test_sdxl_1_0_lora_fusion(self):
         generator = torch.Generator().manual_seed(0)
 
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0").to("cuda")
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
         lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
         lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
         pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
@@ -1438,7 +1438,7 @@ def test_sdxl_1_0_lora_fusion(self):
     def test_sdxl_1_0_lora_unfusion(self):
         generator = torch.Generator().manual_seed(0)
 
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0").to("cuda")
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
         lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
         lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
         pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
@@ -1462,7 +1462,7 @@ def test_sdxl_1_0_lora_unfusion(self):
         release_memory(pipe)
 
     def test_sdxl_1_0_lora_unfusion_effectivity(self):
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0").to("cuda")
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
         pipe.enable_model_cpu_offload()
 
         generator = torch.Generator().manual_seed(0)
@@ -1494,12 +1494,13 @@ def test_sdxl_1_0_lora_unfusion_effectivity(self):
         self.assertTrue(np.allclose(original_image_slice, images_without_fusion_slice, atol=1e-3))
         release_memory(pipe)
 
+    @unittest.skip("This lead to CPU OOM")
     def test_sdxl_1_0_lora_fusion_efficiency(self):
         generator = torch.Generator().manual_seed(0)
         lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
         lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
 
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0").to("cuda")
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
         pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
         pipe.enable_model_cpu_offload()
 
@@ -1587,7 +1588,7 @@ def test_sdxl_1_0_fuse_unfuse_all(self):
     def test_sdxl_1_0_lora_with_sequential_cpu_offloading(self):
         generator = torch.Generator().manual_seed(0)
 
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0").to("cuda")
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
         pipe.enable_sequential_cpu_offload()
         lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
         lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"

From d64dc6fea8aa548328aac5423e4c46671c727d16 Mon Sep 17 00:00:00 2001
From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com>
Date: Mon, 9 Oct 2023 15:20:01 +0200
Subject: [PATCH 086/134] Apply suggestions from code review

Co-authored-by: Sayak Paul <spsayakpaul@gmail.com>
---
 src/diffusers/loaders.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index c8279669b315..4801a9a47931 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -712,10 +712,10 @@ def set_adapters(
             adapter_names (`List[str]` or `str`):
                 The names of the adapters to use.
             weights (`List[float]`, *optional*):
-                The weights to use for the unet. If `None`, the weights are set to `1.0` for all the adapters.
+                The weights for the adapter(s) to be used with unet. If `None`, the weights are set to `1.0` for all the adapters.
         """
         if not self.use_peft_backend:
-            raise ValueError("PEFT backend is required for this method.")
+            raise ValueError("PEFT backend is required for `set_adapters()`.")
 
         def process_weights(adapter_names, weights):
             if weights is None:

From f6d6e5d70984dee5d3b765506cbf5105a9051ef9 Mon Sep 17 00:00:00 2001
From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com>
Date: Mon, 9 Oct 2023 15:21:21 +0200
Subject: [PATCH 087/134] Update src/diffusers/loaders.py

Co-authored-by: Sayak Paul <spsayakpaul@gmail.com>
---
 src/diffusers/loaders.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index 4801a9a47931..1aad5847796b 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -725,7 +725,7 @@ def process_weights(adapter_names, weights):
 
             if len(adapter_names) != len(weights):
                 raise ValueError(
-                    f"Length of adapter names {len(adapter_names)} is not equal to the length of the weights {len(weights)}"
+                    f"Length of adapter names {len(adapter_names)} is not equal to the length of their weights {len(weights)}."
                 )
             return weights
 

From 5394d370e34192dfe8181a7530098f02a128803c Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Mon, 9 Oct 2023 13:23:20 +0000
Subject: [PATCH 088/134] comments

---
 src/diffusers/loaders.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index 1aad5847796b..acbad1a07f51 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -531,8 +531,6 @@ def load_attn_procs(self, pretrained_model_name_or_path_or_dict: Union[str, Dict
                             is_model_cpu_offload = isinstance(getattr(component, "_hf_hook"), CpuOffload)
                             is_sequential_cpu_offload = isinstance(getattr(component, "_hf_hook"), AlignDevicesHook)
 
-                            # There is no need to remove the hooks as they have been already attached in case LoRA
-                            # if not self.use_peft_backend:
                             logger.info(
                                 "Accelerate hooks detected. Since you have called `load_lora_weights()`, the previous hooks will be first removed. Then the LoRA parameters will be loaded and the hooks will be applied again."
                             )
@@ -546,7 +544,6 @@ def load_attn_procs(self, pretrained_model_name_or_path_or_dict: Union[str, Dict
             for target_module, lora_layer in lora_layers_list:
                 target_module.set_lora_layer(lora_layer)
 
-            # if not already_offloaded:
             self.to(dtype=self.dtype, device=self.device)
 
             # Offload back.
@@ -712,7 +709,8 @@ def set_adapters(
             adapter_names (`List[str]` or `str`):
                 The names of the adapters to use.
             weights (`List[float]`, *optional*):
-                The weights for the adapter(s) to be used with unet. If `None`, the weights are set to `1.0` for all the adapters.
+                The weights for the adapter(s) to be used with unet. If `None`, the weights are set to `1.0` for all
+                the adapters.
         """
         if not self.use_peft_backend:
             raise ValueError("PEFT backend is required for `set_adapters()`.")

From 32043aa2a5f94a4905de6095e6556ef3152b0aa3 Mon Sep 17 00:00:00 2001
From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com>
Date: Mon, 9 Oct 2023 15:23:51 +0200
Subject: [PATCH 089/134] Apply suggestions from code review

Co-authored-by: Sayak Paul <spsayakpaul@gmail.com>
---
 src/diffusers/loaders.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index acbad1a07f51..7a64d76f0ff5 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -1518,10 +1518,10 @@ def _maybe_map_sgm_blocks_to_diffusers(cls, state_dict, unet_config, delimiter="
     @classmethod
     def _optionally_disable_offloading(cls, _pipeline):
         """
-        Optionnally removes offloading in case the pipeline has been already sequentially offloaded to CPU.
+        Optionally removes offloading in case the pipeline has been already sequentially offloaded to CPU.
 
         Args:
-            _pipeline (`Pipeline`):
+            _pipeline (`DiffusionPipeline`):
                 The pipeline to disable offloading for.
 
         Returns:

From 599f556f12c05aeedf4fe88798b0c35f4e2b518a Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Mon, 9 Oct 2023 13:24:27 +0000
Subject: [PATCH 090/134] remove comments

---
 src/diffusers/loaders.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index 7a64d76f0ff5..f6c13efe2c22 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -1538,8 +1538,6 @@ def _optionally_disable_offloading(cls, _pipeline):
                         is_model_cpu_offload = isinstance(getattr(component, "_hf_hook"), CpuOffload)
                         is_sequential_cpu_offload = isinstance(getattr(component, "_hf_hook"), AlignDevicesHook)
 
-                        # There is no need to remove the hooks as they have been already attached in case LoRA
-                        # if not self.use_peft_backend:
                         logger.info(
                             "Accelerate hooks detected. Since you have called `load_lora_weights()`, the previous hooks will be first removed. Then the LoRA parameters will be loaded and the hooks will be applied again."
                         )

From 6faee8081409df2f17841c53b35fd795e0024487 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Mon, 9 Oct 2023 13:28:56 +0000
Subject: [PATCH 091/134] more comments

---
 src/diffusers/models/modeling_utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
index 3cb0cc6d88aa..cf3be9b18231 100644
--- a/src/diffusers/models/modeling_utils.py
+++ b/src/diffusers/models/modeling_utils.py
@@ -404,7 +404,8 @@ def disable_adapters(self) -> None:
 
     def enable_adapters(self) -> None:
         """
-        Enable adapters that are attached to the model. The model will use `self.active_adapter()`
+        Enable adapters that are attached to the model. The model will use `self.active_adapters()` to retrieve the
+        list of adapters to enable.
 
         If you are not familiar with adapters and PEFT methods, we invite you to read more about them on the PEFT
         official documentation: https://huggingface.co/docs/peft

From 1c9445281fedfe72eb506b76d9acc676aee1e30c Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Mon, 9 Oct 2023 13:38:29 +0000
Subject: [PATCH 092/134] try

---
 tests/lora/test_lora_layers_peft.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index 8e7df3de6efb..6a8d4e459201 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -775,7 +775,6 @@ def test_simple_inference_with_text_unet_multi_adapter(self):
             "output with no lora and output with lora disabled should give same results",
         )
 
-    @unittest.skip("This is failing for now - need to investigate")
     def test_simple_inference_with_text_unet_lora_unfused_torch_compile(self):
         """
         Tests a simple inference with lora attached to text encoder and unet, then unloads the lora weights
@@ -799,11 +798,11 @@ def test_simple_inference_with_text_unet_lora_unfused_torch_compile(self):
                 self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
             )
 
-        pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
-        pipe.text_encoder = torch.compile(pipe.text_encoder, mode="reduce-overhead", fullgraph=True)
+        pipe.unet = torch.compile(pipe.unet)
+        pipe.text_encoder = torch.compile(pipe.text_encoder)
 
         if self.has_two_text_encoders:
-            pipe.text_encoder_2 = torch.compile(pipe.text_encoder_2, mode="reduce-overhead", fullgraph=True)
+            pipe.text_encoder_2 = torch.compile(pipe.text_encoder_2)
 
         # Just makes sure it works..
         _ = pipe(**inputs, generator=torch.manual_seed(0)).images

From a14779ea5b420e6a7cb2a765f96889edafa5cf13 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Mon, 9 Oct 2023 14:06:04 +0000
Subject: [PATCH 093/134] revert

---
 tests/lora/test_lora_layers_peft.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index 6a8d4e459201..8e7df3de6efb 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -775,6 +775,7 @@ def test_simple_inference_with_text_unet_multi_adapter(self):
             "output with no lora and output with lora disabled should give same results",
         )
 
+    @unittest.skip("This is failing for now - need to investigate")
     def test_simple_inference_with_text_unet_lora_unfused_torch_compile(self):
         """
         Tests a simple inference with lora attached to text encoder and unet, then unloads the lora weights
@@ -798,11 +799,11 @@ def test_simple_inference_with_text_unet_lora_unfused_torch_compile(self):
                 self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
             )
 
-        pipe.unet = torch.compile(pipe.unet)
-        pipe.text_encoder = torch.compile(pipe.text_encoder)
+        pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
+        pipe.text_encoder = torch.compile(pipe.text_encoder, mode="reduce-overhead", fullgraph=True)
 
         if self.has_two_text_encoders:
-            pipe.text_encoder_2 = torch.compile(pipe.text_encoder_2)
+            pipe.text_encoder_2 = torch.compile(pipe.text_encoder_2, mode="reduce-overhead", fullgraph=True)
 
         # Just makes sure it works..
         _ = pipe(**inputs, generator=torch.manual_seed(0)).images

From cad5a4b0ef8bcfc4abdcb76cdc5175812a247146 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Mon, 9 Oct 2023 16:36:07 +0000
Subject: [PATCH 094/134] add `safe_merge` tests

---
 tests/lora/test_lora_layers_peft.py | 31 +++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index 8e7df3de6efb..4b0ef97d35ab 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -775,6 +775,37 @@ def test_simple_inference_with_text_unet_multi_adapter(self):
             "output with no lora and output with lora disabled should give same results",
         )
 
+    def test_lora_fuse_nan(self):
+        components, _, text_lora_config, unet_lora_config = self.get_dummy_components()
+        pipe = self.pipeline_class(**components)
+        pipe = pipe.to(self.torch_device)
+        pipe.set_progress_bar_config(disable=None)
+        _, _, inputs = self.get_dummy_inputs(with_generator=False)
+
+        pipe.text_encoder.add_adapter(text_lora_config, "adapter-1")
+
+        pipe.unet.add_adapter(unet_lora_config, "adapter-1")
+
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
+        self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
+
+        # corrupt one LoRA weight with `inf` values
+        with torch.no_grad():
+            pipe.unet.mid_block.attentions[0].transformer_blocks[0].attn1.to_q.lora_A["adapter-1"].weight += float(
+                "inf"
+            )
+
+        # with `safe_fusing=True` we should see an Error
+        with self.assertRaises(ValueError):
+            pipe.fuse_lora(safe_fusing=True)
+
+        # without we should not see an error, but every image will be black
+        pipe.fuse_lora(safe_fusing=False)
+
+        out = pipe("test", num_inference_steps=2, output_type="np").images
+
+        self.assertTrue(np.isnan(out).all())
+
     @unittest.skip("This is failing for now - need to investigate")
     def test_simple_inference_with_text_unet_lora_unfused_torch_compile(self):
         """

From 3708ed9cb2cefd362ecdc12739520ec5b1894a71 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Tue, 10 Oct 2023 07:36:06 +0000
Subject: [PATCH 095/134] add comment

---
 src/diffusers/loaders.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index dc147e393124..e5d46d765bbb 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -1610,6 +1610,8 @@ def load_lora_into_unet(
             state_dict = convert_unet_state_dict_to_peft(state_dict)
 
             if network_alphas is not None:
+                # The alphas state dict have the same structure as Unet, thus we convert it to peft format using 
+                # `convert_unet_state_dict_to_peft` method.
                 network_alphas = convert_unet_state_dict_to_peft(network_alphas)
 
             rank = {}

From 323612bc7aa21ae0d0825dd2490845932367521e Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Tue, 10 Oct 2023 07:46:39 +0000
Subject: [PATCH 096/134] style, comments and run tests in fp16

---
 src/diffusers/loaders.py            |  2 +-
 tests/lora/test_lora_layers_peft.py | 38 +++++++++++++++++------------
 2 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index e5d46d765bbb..43d6d8ca02eb 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -1610,7 +1610,7 @@ def load_lora_into_unet(
             state_dict = convert_unet_state_dict_to_peft(state_dict)
 
             if network_alphas is not None:
-                # The alphas state dict have the same structure as Unet, thus we convert it to peft format using 
+                # The alphas state dict have the same structure as Unet, thus we convert it to peft format using
                 # `convert_unet_state_dict_to_peft` method.
                 network_alphas = convert_unet_state_dict_to_peft(network_alphas)
 
diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index 4b0ef97d35ab..818bf5e331a9 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -876,7 +876,7 @@ def test_integration_move_lora_cpu(self):
         path = "runwayml/stable-diffusion-v1-5"
         lora_id = "takuma104/lora-test-text-encoder-lora-target"
 
-        pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float32)
+        pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float16)
         pipe.load_lora_weights(lora_id, adapter_name="adapter-1")
         pipe.load_lora_weights(lora_id, adapter_name="adapter-2")
         pipe = pipe.to("cuda")
@@ -891,20 +891,20 @@ def test_integration_move_lora_cpu(self):
             "Lora not correctly set in text encoder",
         )
 
+        # We will offload the first adapter in CPU and check if the offloading
+        # has been performed correctly
         pipe.set_lora_device(["adapter-1"], "cpu")
 
         for name, module in pipe.unet.named_modules():
             if "adapter-1" in name and not isinstance(module, (nn.Dropout, nn.Identity)):
                 self.assertTrue(module.weight.device == torch.device("cpu"))
             elif "adapter-2" in name and not isinstance(module, (nn.Dropout, nn.Identity)):
-                # import pdb; pdb.set_trace()
                 self.assertTrue(module.weight.device != torch.device("cpu"))
 
         for name, module in pipe.text_encoder.named_modules():
             if "adapter-1" in name and not isinstance(module, (nn.Dropout, nn.Identity)):
                 self.assertTrue(module.weight.device == torch.device("cpu"))
             elif "adapter-2" in name and not isinstance(module, (nn.Dropout, nn.Identity)):
-                # import pdb; pdb.set_trace()
                 self.assertTrue(module.weight.device != torch.device("cpu"))
 
         pipe.set_lora_device(["adapter-1"], 0)
@@ -1493,7 +1493,9 @@ def test_sdxl_1_0_lora_unfusion(self):
         release_memory(pipe)
 
     def test_sdxl_1_0_lora_unfusion_effectivity(self):
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
+        pipe = DiffusionPipeline.from_pretrained(
+            "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.bfloat16
+        )
         pipe.enable_model_cpu_offload()
 
         generator = torch.Generator().manual_seed(0)
@@ -1504,7 +1506,7 @@ def test_sdxl_1_0_lora_unfusion_effectivity(self):
 
         lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
         lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
-        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename, torch_dtype=torch.bfloat16)
         pipe.fuse_lora()
         # We need to unload the lora weights since in the previous API `fuse_lora` led to lora weights being
         # silently deleted - otherwise this will CPU OOM
@@ -1531,8 +1533,10 @@ def test_sdxl_1_0_lora_fusion_efficiency(self):
         lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
         lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
 
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
-        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+        pipe = DiffusionPipeline.from_pretrained(
+            "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.bfloat16
+        )
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename, torch_dtype=torch.bfloat16)
         pipe.enable_model_cpu_offload()
 
         start_time = time.time()
@@ -1545,8 +1549,10 @@ def test_sdxl_1_0_lora_fusion_efficiency(self):
 
         del pipe
 
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
-        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+        pipe = DiffusionPipeline.from_pretrained(
+            "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.bfloat16
+        )
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename, torch_dtype=torch.bfloat16)
         pipe.fuse_lora()
         # We need to unload the lora weights since in the previous API `fuse_lora` led to lora weights being
         # silently deleted - otherwise this will CPU OOM
@@ -1569,7 +1575,7 @@ def test_sdxl_1_0_lora_fusion_efficiency(self):
     def test_sdxl_1_0_last_ben(self):
         generator = torch.Generator().manual_seed(0)
 
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0").to("cuda")
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
         pipe.enable_model_cpu_offload()
         lora_model_id = "TheLastBen/Papercut_SDXL"
         lora_filename = "papercut.safetensors"
@@ -1615,6 +1621,7 @@ def test_sdxl_1_0_fuse_unfuse_all(self):
         assert not state_dicts_almost_equal(text_encoder_2_sd, pipe.text_encoder_2.state_dict())
         assert not state_dicts_almost_equal(unet_sd, pipe.unet.state_dict())
         release_memory(pipe)
+        del unet_sd, text_encoder_1_sd, text_encoder_2_sd
 
     def test_sdxl_1_0_lora_with_sequential_cpu_offloading(self):
         generator = torch.Generator().manual_seed(0)
@@ -1662,10 +1669,11 @@ def test_canny_lora(self):
 
     @nightly
     def test_sequential_fuse_unfuse(self):
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
 
         # 1. round
-        pipe.load_lora_weights("Pclanglais/TintinIA")
+        pipe.load_lora_weights("Pclanglais/TintinIA", torch_dtype=torch.float16)
+        pipe.to("cuda")
         pipe.fuse_lora()
 
         generator = torch.Generator().manual_seed(0)
@@ -1677,17 +1685,17 @@ def test_sequential_fuse_unfuse(self):
         pipe.unfuse_lora()
 
         # 2. round
-        pipe.load_lora_weights("ProomptEngineer/pe-balloon-diffusion-style")
+        pipe.load_lora_weights("ProomptEngineer/pe-balloon-diffusion-style", torch_dtype=torch.float16)
         pipe.fuse_lora()
         pipe.unfuse_lora()
 
         # 3. round
-        pipe.load_lora_weights("ostris/crayon_style_lora_sdxl")
+        pipe.load_lora_weights("ostris/crayon_style_lora_sdxl", torch_dtype=torch.float16)
         pipe.fuse_lora()
         pipe.unfuse_lora()
 
         # 4. back to 1st round
-        pipe.load_lora_weights("Pclanglais/TintinIA")
+        pipe.load_lora_weights("Pclanglais/TintinIA", torch_dtype=torch.float16)
         pipe.fuse_lora()
 
         generator = torch.Generator().manual_seed(0)

From 64e2d87fa266dd0e69f580a5398fceaf1d6ce1c7 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Tue, 10 Oct 2023 08:01:44 +0000
Subject: [PATCH 097/134] add warnings

---
 src/diffusers/loaders.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index 43d6d8ca02eb..19d037679a88 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 import os
 import re
+import warnings
 from collections import defaultdict
 from contextlib import nullcontext
 from io import BytesIO
@@ -2237,6 +2238,12 @@ def unload_lora_weights(self):
         ```
         """
         if not self.use_peft_backend:
+            if version.parse(__version__) > version.parse("0.23"):
+                warnings.warn(
+                    "You are using `unload_lora_weights` to disable and unload lora weights. If you want to iteratively enable and disable adapter weights, you can use `pipe.enable_lora()` or `pipe.disable_lora()`. After"
+                    " installing the latest version of PEFT."
+                )
+
             for _, module in self.unet.named_modules():
                 if hasattr(module, "set_lora_layer"):
                     module.set_lora_layer(None)

From db0c3dc8c1c33f41f9b783802575421778c64e0a Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Tue, 10 Oct 2023 08:07:15 +0000
Subject: [PATCH 098/134] fix doc test

---
 src/diffusers/models/__init__.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/diffusers/models/__init__.py b/src/diffusers/models/__init__.py
index 5aa2a0a6c16e..a5d0066d5c40 100644
--- a/src/diffusers/models/__init__.py
+++ b/src/diffusers/models/__init__.py
@@ -17,7 +17,7 @@
 from ..utils import DIFFUSERS_SLOW_IMPORT, _LazyModule, is_flax_available, is_torch_available
 
 
-_import_structure = {"constants": ["USE_PEFT_BACKEND"]}
+_import_structure = {}
 
 if is_torch_available():
     _import_structure["adapter"] = ["MultiAdapter", "T2IAdapter"]
@@ -44,8 +44,6 @@
 
 
 if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
-    from .constants import USE_PEFT_BACKEND
-
     if is_torch_available():
         from .adapter import MultiAdapter, T2IAdapter
         from .autoencoder_asym_kl import AsymmetricAutoencoderKL

From 6172b6442d3f4f91287c4ebc36b30544dda0aea3 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Tue, 10 Oct 2023 12:37:50 +0000
Subject: [PATCH 099/134] replace with `adapter_weights`

---
 src/diffusers/loaders.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index 19d037679a88..e068e2b6c37c 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -2462,18 +2462,16 @@ def enable_lora_for_text_encoder(self, text_encoder: Optional[PreTrainedModel] =
     def set_adapters(
         self,
         adapter_names: Union[List[str], str],
-        unet_weights: List[float] = None,
-        te_weights: List[float] = None,
-        te2_weights: List[float] = None,
+        adapter_weights: List[float] = None,
     ):
         # Handle the UNET
-        self.unet.set_adapters(adapter_names, unet_weights)
+        self.unet.set_adapters(adapter_names, adapter_weights)
 
         # Handle the Text Encoder
         if hasattr(self, "text_encoder"):
-            self.set_adapters_for_text_encoder(adapter_names, self.text_encoder, te_weights)
+            self.set_adapters_for_text_encoder(adapter_names, self.text_encoder, adapter_weights)
         if hasattr(self, "text_encoder_2"):
-            self.set_adapters_for_text_encoder(adapter_names, self.text_encoder_2, te2_weights)
+            self.set_adapters_for_text_encoder(adapter_names, self.text_encoder_2, adapter_weights)
 
     def disable_lora(self):
         if not self.use_peft_backend:

From 74d80a9365d002094bef5155e2bf8e9218764e72 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Tue, 10 Oct 2023 15:07:56 +0000
Subject: [PATCH 100/134] add `get_active_adapters()`

---
 src/diffusers/loaders.py            | 20 ++++++++++++++++++++
 tests/lora/test_lora_layers_peft.py | 28 +++++++++++++++++++++++++++-
 2 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index e068e2b6c37c..46de1b27e7e4 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -2499,6 +2499,26 @@ def enable_lora(self):
         if hasattr(self, "text_encoder_2"):
             self.enable_lora_for_text_encoder(self.text_encoder_2)
 
+    def get_active_adapters(self) -> Optional[List[str]]:
+        """
+        Gets the list of the current active adapters
+        """
+        if not self.use_peft_backend:
+            raise ValueError(
+                "PEFT backend is required for this method. Please install the latest version of PEFT `pip install -U peft`"
+            )
+
+        from peft.tuners.tuners_utils import BaseTunerLayer
+
+        active_adapters = None
+
+        for module in self.unet.modules():
+            if isinstance(module, BaseTunerLayer):
+                active_adapters = module.active_adapters
+                break
+
+        return active_adapters
+
     def set_lora_device(self, adapter_names: List[str], device: Union[torch.device, str, int]) -> None:
         """
         Sets the LoRAs that are listed in `adapter_names` into a target device. Usefull for offloading the LoRA in CPU
diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index 818bf5e331a9..d41fd515a029 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -806,6 +806,32 @@ def test_lora_fuse_nan(self):
 
         self.assertTrue(np.isnan(out).all())
 
+    def test_get_adapters(self):
+        """
+        Tests a simple usecase where we attach multiple adapters and check if the results
+        are the expected results
+        """
+        components, _, text_lora_config, unet_lora_config = self.get_dummy_components()
+        pipe = self.pipeline_class(**components)
+        pipe = pipe.to(self.torch_device)
+        pipe.set_progress_bar_config(disable=None)
+        _, _, inputs = self.get_dummy_inputs(with_generator=False)
+
+        pipe.text_encoder.add_adapter(text_lora_config, "adapter-1")
+        pipe.unet.add_adapter(unet_lora_config, "adapter-1")
+
+        adapter_names = pipe.get_active_adapters()
+        self.assertListEqual(adapter_names, ["adapter-1"])
+
+        pipe.text_encoder.add_adapter(text_lora_config, "adapter-2")
+        pipe.unet.add_adapter(unet_lora_config, "adapter-2")
+
+        adapter_names = pipe.get_active_adapters()
+        self.assertListEqual(adapter_names, ["adapter-2"])
+
+        pipe.set_adapters(["adapter-1", "adapter-2"])
+        self.assertListEqual(pipe.get_active_adapters(), ["adapter-1", "adapter-2"])
+
     @unittest.skip("This is failing for now - need to investigate")
     def test_simple_inference_with_text_unet_lora_unfused_torch_compile(self):
         """
@@ -1035,7 +1061,7 @@ def test_integration_logits_multi_adapter(self):
         self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
 
         # multi-adapter inference
-        pipe.set_adapters(["pixel", "toy"], unet_weights=[0.5, 1.0])
+        pipe.set_adapters(["pixel", "toy"], adapter_weights=[0.5, 1.0])
         images = pipe(
             prompt,
             num_inference_steps=30,

From cb588ae1d88d9536e40fe72439ff5957b8c2e607 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Tue, 10 Oct 2023 17:02:09 +0000
Subject: [PATCH 101/134] expose `get_list_adapters` method

---
 src/diffusers/loaders.py            | 22 ++++++++++++++++++
 tests/lora/test_lora_layers_peft.py | 35 +++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index 46de1b27e7e4..e5b1cbac763d 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -2519,6 +2519,28 @@ def get_active_adapters(self) -> Optional[List[str]]:
 
         return active_adapters
 
+    def get_list_adapters(self) -> Dict[str, List[str]]:
+        """
+        Gets the current list of all available adapters in the pipeline.
+        """
+        if not self.use_peft_backend:
+            raise ValueError(
+                "PEFT backend is required for this method. Please install the latest version of PEFT `pip install -U peft`"
+            )
+
+        set_adapters = {}
+
+        if hasattr(self, "text_encoder") and hasattr(self.text_encoder, "peft_config"):
+            set_adapters["text_encoder"] = list(self.text_encoder.peft_config.keys())
+
+        if hasattr(self, "text_encoder_2") and hasattr(self.text_encoder_2, "peft_config"):
+            set_adapters["text_encoder_2"] = list(self.text_encoder_2.peft_config.keys())
+
+        if hasattr(self, "unet") and hasattr(self.unet, "peft_config"):
+            set_adapters["unet"] = list(self.unet.peft_config.keys())
+
+        return set_adapters
+
     def set_lora_device(self, adapter_names: List[str], device: Union[torch.device, str, int]) -> None:
         """
         Sets the LoRAs that are listed in `adapter_names` into a target device. Usefull for offloading the LoRA in CPU
diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index d41fd515a029..cccd44ebf406 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -832,6 +832,41 @@ def test_get_adapters(self):
         pipe.set_adapters(["adapter-1", "adapter-2"])
         self.assertListEqual(pipe.get_active_adapters(), ["adapter-1", "adapter-2"])
 
+    def test_get_list_adapters(self):
+        """
+        Tests a simple usecase where we attach multiple adapters and check if the results
+        are the expected results
+        """
+        components, _, text_lora_config, unet_lora_config = self.get_dummy_components()
+        pipe = self.pipeline_class(**components)
+        pipe = pipe.to(self.torch_device)
+        pipe.set_progress_bar_config(disable=None)
+
+        pipe.text_encoder.add_adapter(text_lora_config, "adapter-1")
+        pipe.unet.add_adapter(unet_lora_config, "adapter-1")
+
+        adapter_names = pipe.get_list_adapters()
+        self.assertDictEqual(adapter_names, {"text_encoder": ["adapter-1"], "unet": ["adapter-1"]})
+
+        pipe.text_encoder.add_adapter(text_lora_config, "adapter-2")
+        pipe.unet.add_adapter(unet_lora_config, "adapter-2")
+
+        adapter_names = pipe.get_list_adapters()
+        self.assertDictEqual(
+            adapter_names, {"text_encoder": ["adapter-1", "adapter-2"], "unet": ["adapter-1", "adapter-2"]}
+        )
+
+        pipe.set_adapters(["adapter-1", "adapter-2"])
+        self.assertDictEqual(
+            pipe.get_list_adapters(), {"unet": ["adapter-1", "adapter-2"], "text_encoder": ["adapter-1", "adapter-2"]}
+        )
+
+        pipe.unet.add_adapter(unet_lora_config, "adapter-3")
+        self.assertDictEqual(
+            pipe.get_list_adapters(),
+            {"unet": ["adapter-1", "adapter-2", "adapter-3"], "text_encoder": ["adapter-1", "adapter-2"]},
+        )
+
     @unittest.skip("This is failing for now - need to investigate")
     def test_simple_inference_with_text_unet_lora_unfused_torch_compile(self):
         """

From b419b5239c4df3ebe860d59a3a7ac253e608d26e Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Tue, 10 Oct 2023 17:04:17 +0000
Subject: [PATCH 102/134] better error message

---
 src/diffusers/loaders.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index e5b1cbac763d..5b1ccffde94c 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -1631,6 +1631,11 @@ def load_lora_into_unet(
             # otherwise loading LoRA weights will lead to an error
             is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline)
 
+            if hasattr(unet, "peft_config") and adapter_name in list(unet.peft_config.keys()):
+                raise ValueError(
+                    f"Adapter name {adapter_name} already in use in the Unet - please select a new adapter name."
+                )
+
             inject_adapter_in_model(lora_config, unet, adapter_name=adapter_name)
             incompatible_keys = set_peft_model_state_dict(unet, state_dict, adapter_name)
 
@@ -1768,6 +1773,7 @@ def load_lora_into_text_encoder(
                     is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline)
 
                     # inject LoRA layers and load the state dict
+                    # in transformers we automatically check whether the adapter name is already in use or not
                     text_encoder.load_adapter(
                         adapter_name=adapter_name,
                         adapter_state_dict=text_encoder_lora_state_dict,

From 498dc174bd40e4d536bbff59a5d548f9005d9a72 Mon Sep 17 00:00:00 2001
From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com>
Date: Tue, 10 Oct 2023 19:06:15 +0200
Subject: [PATCH 103/134] Apply suggestions from code review

Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>
---
 src/diffusers/loaders.py               |  8 ++++----
 src/diffusers/models/modeling_utils.py | 17 ++++++++---------
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index 5b1ccffde94c..9aa3772cde22 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -719,7 +719,7 @@ def set_adapters(
             adapter_names (`List[str]` or `str`):
                 The names of the adapters to use.
             weights (`List[float]`, *optional*):
-                The weights for the adapter(s) to be used with unet. If `None`, the weights are set to `1.0` for all
+                The adapter(s) weights to use with the UNet. If `None`, the weights are set to `1.0` for all
                 the adapters.
         """
         if not self.use_peft_backend:
@@ -2549,10 +2549,10 @@ def get_list_adapters(self) -> Dict[str, List[str]]:
 
     def set_lora_device(self, adapter_names: List[str], device: Union[torch.device, str, int]) -> None:
         """
-        Sets the LoRAs that are listed in `adapter_names` into a target device. Usefull for offloading the LoRA in CPU
-        in case one wants to load multiple adapters and free some GPU memory.
+        Sets the LoRAs listed in `adapter_names` into a target device. Useful for offloading the LoRA in the CPU
+        in case you want to load multiple adapters and free some GPU memory.
 
-        Note this is specific to LoRA and needs some tweaking for new adapter architectures (e.g. IA3, AdaLora..)
+        This is specific to LoRA and needs some tweaking for new adapter architectures (IA3, AdaLora).
 
         Args:
             adapter_names (`List[str]`):
diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
index cf3be9b18231..f875278bf80e 100644
--- a/src/diffusers/models/modeling_utils.py
+++ b/src/diffusers/models/modeling_utils.py
@@ -297,17 +297,16 @@ def disable_xformers_memory_efficient_attention(self):
 
     def add_adapter(self, adapter_config, adapter_name: str = "default") -> None:
         r"""
-        Adds a fresh new adapter to the current model for training purpose. If no adapter name is passed, a default
-        name is assigned to the adapter to follow the convention of PEFT library (in PEFT we use "default" as the
+        Adds a new adapter to the current model for training. If no adapter name is passed, a default
+        name is assigned to the adapter to follow the convention of the PEFT library (in PEFT we use `"default"` as the
         default adapter name).
 
-        If you are not familiar with adapters and PEFT methods, we invite you to read more about them on the PEFT
-        official documentation: https://huggingface.co/docs/peft
+        If you are not familiar with adapters and PEFT methods, we invite you to read more about them in the PEFT [documentation](https://huggingface.co/docs/peft).
 
         Args:
-            adapter_config (`~peft.PeftConfig`):
-                The configuration of the adapter to add, supported adapters are non-prefix tuning and adaption prompts
-                methods
+            adapter_config (`[~peft.PeftConfig]`):
+                The configuration of the adapter to add; supported adapters are non-prefix tuning and adaption prompt
+                methods.
             adapter_name (`str`, *optional*, defaults to `"default"`):
                 The name of the adapter to add. If no name is passed, a default name is assigned to the adapter.
         """
@@ -333,7 +332,7 @@ def add_adapter(self, adapter_config, adapter_name: str = "default") -> None:
 
     def set_adapter(self, adapter_name: Union[str, List[str]]) -> None:
         """
-        Sets a specific adapter by forcing the model to use a that adapter and disable the other adapters.
+        Sets a specific adapter by forcing the model to only use that adapter and disables the other adapters.
 
         If you are not familiar with adapters and PEFT methods, we invite you to read more about them on the PEFT
         official documentation: https://huggingface.co/docs/peft
@@ -382,7 +381,7 @@ def set_adapter(self, adapter_name: Union[str, List[str]]) -> None:
 
     def disable_adapters(self) -> None:
         r"""
-        Disable all adapters that are attached to the model. This leads to inferring with the base model only.
+        Disable all adapters attached to the model and fallback to inference with the base model only.
 
         If you are not familiar with adapters and PEFT methods, we invite you to read more about them on the PEFT
         official documentation: https://huggingface.co/docs/peft

From 02d17b38c89cd3ab9bcecd29a09c24b41d986bd9 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Tue, 10 Oct 2023 17:08:53 +0000
Subject: [PATCH 104/134] style

---
 src/diffusers/loaders.py               | 8 ++++----
 src/diffusers/models/modeling_utils.py | 9 +++++----
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index 9aa3772cde22..9f136d88d0a5 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -719,8 +719,8 @@ def set_adapters(
             adapter_names (`List[str]` or `str`):
                 The names of the adapters to use.
             weights (`List[float]`, *optional*):
-                The adapter(s) weights to use with the UNet. If `None`, the weights are set to `1.0` for all
-                the adapters.
+                The adapter(s) weights to use with the UNet. If `None`, the weights are set to `1.0` for all the
+                adapters.
         """
         if not self.use_peft_backend:
             raise ValueError("PEFT backend is required for `set_adapters()`.")
@@ -2549,8 +2549,8 @@ def get_list_adapters(self) -> Dict[str, List[str]]:
 
     def set_lora_device(self, adapter_names: List[str], device: Union[torch.device, str, int]) -> None:
         """
-        Sets the LoRAs listed in `adapter_names` into a target device. Useful for offloading the LoRA in the CPU
-        in case you want to load multiple adapters and free some GPU memory.
+        Sets the LoRAs listed in `adapter_names` into a target device. Useful for offloading the LoRA in the CPU in
+        case you want to load multiple adapters and free some GPU memory.
 
         This is specific to LoRA and needs some tweaking for new adapter architectures (IA3, AdaLora).
 
diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
index f875278bf80e..c889469030ba 100644
--- a/src/diffusers/models/modeling_utils.py
+++ b/src/diffusers/models/modeling_utils.py
@@ -297,11 +297,12 @@ def disable_xformers_memory_efficient_attention(self):
 
     def add_adapter(self, adapter_config, adapter_name: str = "default") -> None:
         r"""
-        Adds a new adapter to the current model for training. If no adapter name is passed, a default
-        name is assigned to the adapter to follow the convention of the PEFT library (in PEFT we use `"default"` as the
-        default adapter name).
+        Adds a new adapter to the current model for training. If no adapter name is passed, a default name is assigned
+        to the adapter to follow the convention of the PEFT library (in PEFT we use `"default"` as the default adapter
+        name).
 
-        If you are not familiar with adapters and PEFT methods, we invite you to read more about them in the PEFT [documentation](https://huggingface.co/docs/peft).
+        If you are not familiar with adapters and PEFT methods, we invite you to read more about them in the PEFT
+        [documentation](https://huggingface.co/docs/peft).
 
         Args:
             adapter_config (`[~peft.PeftConfig]`):

From 400c2da17c9dde8a616e55007739e43ddcc79157 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Tue, 10 Oct 2023 17:24:12 +0000
Subject: [PATCH 105/134] trigger slow lora tests

---
 .github/workflows/push_tests.yml | 245 +------------------------------
 1 file changed, 4 insertions(+), 241 deletions(-)

diff --git a/.github/workflows/push_tests.yml b/.github/workflows/push_tests.yml
index a15a5412c4e4..9c5fc4e062d0 100644
--- a/.github/workflows/push_tests.yml
+++ b/.github/workflows/push_tests.yml
@@ -3,7 +3,7 @@ name: Slow Tests on main
 on:
   push:
     branches:
-      - main
+      - test-peft-unet
 
 
 env:
@@ -53,57 +53,6 @@ jobs:
           name: test-pipelines.json
           path: reports
 
-  torch_pipelines_cuda_tests:
-    name: Torch Pipelines CUDA Slow Tests
-    needs: setup_torch_cuda_pipeline_matrix
-    strategy:
-      fail-fast: false
-      max-parallel: 1
-      matrix:
-        module: ${{ fromJson(needs.setup_torch_cuda_pipeline_matrix.outputs.pipeline_test_matrix) }}
-    runs-on: docker-gpu
-    container:
-      image: diffusers/diffusers-pytorch-cuda
-      options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --gpus 0
-    steps:
-      - name: Checkout diffusers
-        uses: actions/checkout@v3
-        with:
-          fetch-depth: 2
-      - name: NVIDIA-SMI
-        run: |
-          nvidia-smi
-      - name: Install dependencies
-        run: |
-          apt-get update && apt-get install libsndfile1-dev libgl1 -y
-          python -m pip install -e .[quality,test]
-          python -m pip install git+https://github.com/huggingface/accelerate.git
-      - name: Environment
-        run: |
-          python utils/print_env.py
-      - name: Slow PyTorch CUDA checkpoint tests on Ubuntu
-        env:
-          HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-          # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
-          CUBLAS_WORKSPACE_CONFIG: :16:8
-        run: |
-          python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-            -s -v -k "not Flax and not Onnx" \
-            --make-reports=tests_pipeline_${{ matrix.module }}_cuda \
-            tests/pipelines/${{ matrix.module }}
-      - name: Failure short reports
-        if: ${{ failure() }}
-        run: |
-          cat reports/tests_pipeline_${{ matrix.module }}_cuda_stats.txt
-          cat reports/tests_pipeline_${{ matrix.module }}_cuda_failures_short.txt
-
-      - name: Test suite reports artifacts
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v2
-        with:
-          name: pipeline_${{ matrix.module }}_test_reports
-          path: reports
-
   torch_cuda_tests:
     name: Torch CUDA Tests
     runs-on: docker-gpu
@@ -115,7 +64,7 @@ jobs:
         shell: bash
     strategy:
       matrix:
-        module: [models, schedulers, lora, others]
+        module: [lora]
     steps:
     - name: Checkout diffusers
       uses: actions/checkout@v3
@@ -127,6 +76,7 @@ jobs:
         apt-get update && apt-get install libsndfile1-dev libgl1 -y
         python -m pip install -e .[quality,test]
         python -m pip install git+https://github.com/huggingface/accelerate.git
+        python -m pip install -U git+https://github.com/huggingface/peft.git
 
     - name: Environment
       run: |
@@ -147,191 +97,4 @@ jobs:
       if: ${{ failure() }}
       run: |
         cat reports/tests_torch_cuda_stats.txt
-        cat reports/tests_torch_cuda_failures_short.txt
-
-    - name: Test suite reports artifacts
-      if: ${{ always() }}
-      uses: actions/upload-artifact@v2
-      with:
-        name: torch_cuda_test_reports
-        path: reports
-
-  flax_tpu_tests:
-    name: Flax TPU Tests
-    runs-on: docker-tpu
-    container:
-      image: diffusers/diffusers-flax-tpu
-      options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --privileged
-    defaults:
-      run:
-        shell: bash
-    steps:
-    - name: Checkout diffusers
-      uses: actions/checkout@v3
-      with:
-        fetch-depth: 2
-
-    - name: Install dependencies
-      run: |
-        apt-get update && apt-get install libsndfile1-dev libgl1 -y
-        python -m pip install -e .[quality,test]
-        python -m pip install git+https://github.com/huggingface/accelerate.git
-
-    - name: Environment
-      run: |
-        python utils/print_env.py
-
-    - name: Run slow Flax TPU tests
-      env:
-        HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-      run: |
-        python -m pytest -n 0 \
-          -s -v -k "Flax" \
-          --make-reports=tests_flax_tpu \
-          tests/
-
-    - name: Failure short reports
-      if: ${{ failure() }}
-      run: |
-        cat reports/tests_flax_tpu_stats.txt
-        cat reports/tests_flax_tpu_failures_short.txt
-
-    - name: Test suite reports artifacts
-      if: ${{ always() }}
-      uses: actions/upload-artifact@v2
-      with:
-        name: flax_tpu_test_reports
-        path: reports
-
-  onnx_cuda_tests:
-    name: ONNX CUDA Tests
-    runs-on: docker-gpu
-    container:
-      image: diffusers/diffusers-onnxruntime-cuda
-      options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --gpus 0
-    defaults:
-      run:
-        shell: bash
-    steps:
-    - name: Checkout diffusers
-      uses: actions/checkout@v3
-      with:
-        fetch-depth: 2
-
-    - name: Install dependencies
-      run: |
-        apt-get update && apt-get install libsndfile1-dev libgl1 -y
-        python -m pip install -e .[quality,test]
-        python -m pip install git+https://github.com/huggingface/accelerate.git
-
-    - name: Environment
-      run: |
-        python utils/print_env.py
-
-    - name: Run slow ONNXRuntime CUDA tests
-      env:
-        HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-      run: |
-        python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-          -s -v -k "Onnx" \
-          --make-reports=tests_onnx_cuda \
-          tests/
-
-    - name: Failure short reports
-      if: ${{ failure() }}
-      run: |
-        cat reports/tests_onnx_cuda_stats.txt
-        cat reports/tests_onnx_cuda_failures_short.txt
-
-    - name: Test suite reports artifacts
-      if: ${{ always() }}
-      uses: actions/upload-artifact@v2
-      with:
-        name: onnx_cuda_test_reports
-        path: reports
-
-  run_torch_compile_tests:
-    name: PyTorch Compile CUDA tests
-
-    runs-on: docker-gpu
-
-    container:
-      image: diffusers/diffusers-pytorch-compile-cuda
-      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/
-
-    steps:
-    - name: Checkout diffusers
-      uses: actions/checkout@v3
-      with:
-        fetch-depth: 2
-
-    - name: NVIDIA-SMI
-      run: |
-        nvidia-smi
-    - name: Install dependencies
-      run: |
-        python -m pip install -e .[quality,test,training]
-    - name: Environment
-      run: |
-        python utils/print_env.py
-    - name: Run example tests on GPU
-      env:
-        HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-      run: |
-        python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v -k "compile" --make-reports=tests_torch_compile_cuda tests/
-    - name: Failure short reports
-      if: ${{ failure() }}
-      run: cat reports/tests_torch_compile_cuda_failures_short.txt
-
-    - name: Test suite reports artifacts
-      if: ${{ always() }}
-      uses: actions/upload-artifact@v2
-      with:
-        name: torch_compile_test_reports
-        path: reports
-
-  run_examples_tests:
-    name: Examples PyTorch CUDA tests on Ubuntu
-
-    runs-on: docker-gpu
-
-    container:
-      image: diffusers/diffusers-pytorch-cuda
-      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/
-
-    steps:
-    - name: Checkout diffusers
-      uses: actions/checkout@v3
-      with:
-        fetch-depth: 2
-
-    - name: NVIDIA-SMI
-      run: |
-        nvidia-smi
-
-    - name: Install dependencies
-      run: |
-        python -m pip install -e .[quality,test,training]
-
-    - name: Environment
-      run: |
-        python utils/print_env.py
-
-    - name: Run example tests on GPU
-      env:
-        HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-      run: |
-        python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v --make-reports=examples_torch_cuda examples/
-
-    - name: Failure short reports
-      if: ${{ failure() }}
-      run: |
-        cat reports/examples_torch_cuda_stats.txt
-        cat reports/examples_torch_cuda_failures_short.txt
-
-    - name: Test suite reports artifacts
-      if: ${{ always() }}
-      uses: actions/upload-artifact@v2
-      with:
-        name: examples_test_reports
-        path: reports
\ No newline at end of file
+        cat reports/tests_torch_cuda_failures_short.txt
\ No newline at end of file

From e6d80424a2c90fe7a6c88ed45ffdb6bb44065867 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Tue, 10 Oct 2023 17:52:57 +0000
Subject: [PATCH 106/134] fix tests

---
 src/diffusers/loaders.py                    |  2 ++
 src/diffusers/models/attention_processor.py | 10 ++++++----
 tests/lora/test_lora_layers_old_backend.py  |  3 +++
 tests/lora/test_lora_layers_peft.py         |  8 ++++----
 4 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index 9f136d88d0a5..35edc320a809 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -2255,6 +2255,8 @@ def unload_lora_weights(self):
                     module.set_lora_layer(None)
         else:
             recurse_remove_peft_layers(self.unet)
+            if hasattr(self.unet, "peft_config"):
+                del self.unet.peft_config
 
         # Safe to call the following regardless of LoRA.
         self._remove_text_encoder_monkey_patch()
diff --git a/src/diffusers/models/attention_processor.py b/src/diffusers/models/attention_processor.py
index 5bb8e2fd9820..c5fe1509238d 100644
--- a/src/diffusers/models/attention_processor.py
+++ b/src/diffusers/models/attention_processor.py
@@ -550,6 +550,8 @@ def __call__(
     ):
         residual = hidden_states
 
+        args = () if USE_PEFT_BACKEND else (scale,)
+
         if attn.spatial_norm is not None:
             hidden_states = attn.spatial_norm(hidden_states, temb)
 
@@ -567,15 +569,15 @@ def __call__(
         if attn.group_norm is not None:
             hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)
 
-        query = attn.to_q(hidden_states, scale=scale)
+        query = attn.to_q(hidden_states, *args)
 
         if encoder_hidden_states is None:
             encoder_hidden_states = hidden_states
         elif attn.norm_cross:
             encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states)
 
-        key = attn.to_k(encoder_hidden_states, scale=scale)
-        value = attn.to_v(encoder_hidden_states, scale=scale)
+        key = attn.to_k(encoder_hidden_states, *args)
+        value = attn.to_v(encoder_hidden_states, *args)
 
         query = attn.head_to_batch_dim(query)
         key = attn.head_to_batch_dim(key)
@@ -586,7 +588,7 @@ def __call__(
         hidden_states = attn.batch_to_head_dim(hidden_states)
 
         # linear proj
-        hidden_states = attn.to_out[0](hidden_states, scale=scale)
+        hidden_states = attn.to_out[0](hidden_states, *args)
         # dropout
         hidden_states = attn.to_out[1](hidden_states)
 
diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index faefb98564a9..bb085ecb505f 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -662,6 +662,7 @@ def test_lora_save_load_with_xformers(self):
         self.assertFalse(torch.allclose(torch.from_numpy(orig_image_slice), torch.from_numpy(lora_image_slice)))
 
 
+@deprecate_after_peft_backend
 class SDXInpaintLoraMixinTests(unittest.TestCase):
     def get_dummy_inputs(self, device, seed=0, img_res=64, output_pil=True):
         # TODO: use tensor inputs instead of PIL, this is here just to leave the old expected_slices untouched
@@ -1376,6 +1377,7 @@ def test_save_load_fused_lora_modules(self):
         ), "The pipeline was serialized with LoRA parameters fused inside of the respected modules. The loaded pipeline should yield proper outputs, henceforth."
 
 
+@deprecate_after_peft_backend
 class UNet2DConditionLoRAModelTests(unittest.TestCase):
     model_class = UNet2DConditionModel
     main_input_name = "sample"
@@ -1624,6 +1626,7 @@ def test_lora_xformers_on_off(self, expected_max_diff=6e-4):
         assert max_diff_off_sample < expected_max_diff
 
 
+@deprecate_after_peft_backend
 class UNet3DConditionModelTests(unittest.TestCase):
     model_class = UNet3DConditionModel
     main_input_name = "sample"
diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index cccd44ebf406..3d825f3a54f7 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -1569,9 +1569,6 @@ def test_sdxl_1_0_lora_unfusion_effectivity(self):
         lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
         pipe.load_lora_weights(lora_model_id, weight_name=lora_filename, torch_dtype=torch.bfloat16)
         pipe.fuse_lora()
-        # We need to unload the lora weights since in the previous API `fuse_lora` led to lora weights being
-        # silently deleted - otherwise this will CPU OOM
-        pipe.unload_lora_weights()
 
         generator = torch.Generator().manual_seed(0)
         _ = pipe(
@@ -1579,6 +1576,10 @@ def test_sdxl_1_0_lora_unfusion_effectivity(self):
         ).images
 
         pipe.unfuse_lora()
+
+        # We need to unload the lora weights - in the old API unfuse led to unloading the adapter weights
+        pipe.unload_lora_weights()
+
         generator = torch.Generator().manual_seed(0)
         images = pipe(
             "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
@@ -1588,7 +1589,6 @@ def test_sdxl_1_0_lora_unfusion_effectivity(self):
         self.assertTrue(np.allclose(original_image_slice, images_without_fusion_slice, atol=1e-3))
         release_memory(pipe)
 
-    @unittest.skip("This lead to CPU OOM")
     def test_sdxl_1_0_lora_fusion_efficiency(self):
         generator = torch.Generator().manual_seed(0)
         lora_model_id = "hf-internal-testing/sdxl-1.0-lora"

From d5e76476237cd6b698516f512c1ca8724aa4f750 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Tue, 10 Oct 2023 18:58:55 +0000
Subject: [PATCH 107/134] maybe fix last test

---
 tests/lora/test_lora_layers_peft.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index 3d825f3a54f7..0abd5f8fa1e1 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -1554,9 +1554,7 @@ def test_sdxl_1_0_lora_unfusion(self):
         release_memory(pipe)
 
     def test_sdxl_1_0_lora_unfusion_effectivity(self):
-        pipe = DiffusionPipeline.from_pretrained(
-            "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.bfloat16
-        )
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
         pipe.enable_model_cpu_offload()
 
         generator = torch.Generator().manual_seed(0)
@@ -1567,7 +1565,7 @@ def test_sdxl_1_0_lora_unfusion_effectivity(self):
 
         lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
         lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
-        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename, torch_dtype=torch.bfloat16)
+        pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
         pipe.fuse_lora()
 
         generator = torch.Generator().manual_seed(0)

From a02c1626e61fcdf299486a48e67f1e28f7391c08 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Wed, 11 Oct 2023 16:20:02 +0000
Subject: [PATCH 108/134] revert

---
 .github/workflows/push_tests.yml | 245 ++++++++++++++++++++++++++++++-
 1 file changed, 241 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/push_tests.yml b/.github/workflows/push_tests.yml
index 9c5fc4e062d0..a15a5412c4e4 100644
--- a/.github/workflows/push_tests.yml
+++ b/.github/workflows/push_tests.yml
@@ -3,7 +3,7 @@ name: Slow Tests on main
 on:
   push:
     branches:
-      - test-peft-unet
+      - main
 
 
 env:
@@ -53,6 +53,57 @@ jobs:
           name: test-pipelines.json
           path: reports
 
+  torch_pipelines_cuda_tests:
+    name: Torch Pipelines CUDA Slow Tests
+    needs: setup_torch_cuda_pipeline_matrix
+    strategy:
+      fail-fast: false
+      max-parallel: 1
+      matrix:
+        module: ${{ fromJson(needs.setup_torch_cuda_pipeline_matrix.outputs.pipeline_test_matrix) }}
+    runs-on: docker-gpu
+    container:
+      image: diffusers/diffusers-pytorch-cuda
+      options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --gpus 0
+    steps:
+      - name: Checkout diffusers
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 2
+      - name: NVIDIA-SMI
+        run: |
+          nvidia-smi
+      - name: Install dependencies
+        run: |
+          apt-get update && apt-get install libsndfile1-dev libgl1 -y
+          python -m pip install -e .[quality,test]
+          python -m pip install git+https://github.com/huggingface/accelerate.git
+      - name: Environment
+        run: |
+          python utils/print_env.py
+      - name: Slow PyTorch CUDA checkpoint tests on Ubuntu
+        env:
+          HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+          # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
+          CUBLAS_WORKSPACE_CONFIG: :16:8
+        run: |
+          python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
+            -s -v -k "not Flax and not Onnx" \
+            --make-reports=tests_pipeline_${{ matrix.module }}_cuda \
+            tests/pipelines/${{ matrix.module }}
+      - name: Failure short reports
+        if: ${{ failure() }}
+        run: |
+          cat reports/tests_pipeline_${{ matrix.module }}_cuda_stats.txt
+          cat reports/tests_pipeline_${{ matrix.module }}_cuda_failures_short.txt
+
+      - name: Test suite reports artifacts
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v2
+        with:
+          name: pipeline_${{ matrix.module }}_test_reports
+          path: reports
+
   torch_cuda_tests:
     name: Torch CUDA Tests
     runs-on: docker-gpu
@@ -64,7 +115,7 @@ jobs:
         shell: bash
     strategy:
       matrix:
-        module: [lora]
+        module: [models, schedulers, lora, others]
     steps:
     - name: Checkout diffusers
       uses: actions/checkout@v3
@@ -76,7 +127,6 @@ jobs:
         apt-get update && apt-get install libsndfile1-dev libgl1 -y
         python -m pip install -e .[quality,test]
         python -m pip install git+https://github.com/huggingface/accelerate.git
-        python -m pip install -U git+https://github.com/huggingface/peft.git
 
     - name: Environment
       run: |
@@ -97,4 +147,191 @@ jobs:
       if: ${{ failure() }}
       run: |
         cat reports/tests_torch_cuda_stats.txt
-        cat reports/tests_torch_cuda_failures_short.txt
\ No newline at end of file
+        cat reports/tests_torch_cuda_failures_short.txt
+
+    - name: Test suite reports artifacts
+      if: ${{ always() }}
+      uses: actions/upload-artifact@v2
+      with:
+        name: torch_cuda_test_reports
+        path: reports
+
+  flax_tpu_tests:
+    name: Flax TPU Tests
+    runs-on: docker-tpu
+    container:
+      image: diffusers/diffusers-flax-tpu
+      options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --privileged
+    defaults:
+      run:
+        shell: bash
+    steps:
+    - name: Checkout diffusers
+      uses: actions/checkout@v3
+      with:
+        fetch-depth: 2
+
+    - name: Install dependencies
+      run: |
+        apt-get update && apt-get install libsndfile1-dev libgl1 -y
+        python -m pip install -e .[quality,test]
+        python -m pip install git+https://github.com/huggingface/accelerate.git
+
+    - name: Environment
+      run: |
+        python utils/print_env.py
+
+    - name: Run slow Flax TPU tests
+      env:
+        HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+      run: |
+        python -m pytest -n 0 \
+          -s -v -k "Flax" \
+          --make-reports=tests_flax_tpu \
+          tests/
+
+    - name: Failure short reports
+      if: ${{ failure() }}
+      run: |
+        cat reports/tests_flax_tpu_stats.txt
+        cat reports/tests_flax_tpu_failures_short.txt
+
+    - name: Test suite reports artifacts
+      if: ${{ always() }}
+      uses: actions/upload-artifact@v2
+      with:
+        name: flax_tpu_test_reports
+        path: reports
+
+  onnx_cuda_tests:
+    name: ONNX CUDA Tests
+    runs-on: docker-gpu
+    container:
+      image: diffusers/diffusers-onnxruntime-cuda
+      options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --gpus 0
+    defaults:
+      run:
+        shell: bash
+    steps:
+    - name: Checkout diffusers
+      uses: actions/checkout@v3
+      with:
+        fetch-depth: 2
+
+    - name: Install dependencies
+      run: |
+        apt-get update && apt-get install libsndfile1-dev libgl1 -y
+        python -m pip install -e .[quality,test]
+        python -m pip install git+https://github.com/huggingface/accelerate.git
+
+    - name: Environment
+      run: |
+        python utils/print_env.py
+
+    - name: Run slow ONNXRuntime CUDA tests
+      env:
+        HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+      run: |
+        python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
+          -s -v -k "Onnx" \
+          --make-reports=tests_onnx_cuda \
+          tests/
+
+    - name: Failure short reports
+      if: ${{ failure() }}
+      run: |
+        cat reports/tests_onnx_cuda_stats.txt
+        cat reports/tests_onnx_cuda_failures_short.txt
+
+    - name: Test suite reports artifacts
+      if: ${{ always() }}
+      uses: actions/upload-artifact@v2
+      with:
+        name: onnx_cuda_test_reports
+        path: reports
+
+  run_torch_compile_tests:
+    name: PyTorch Compile CUDA tests
+
+    runs-on: docker-gpu
+
+    container:
+      image: diffusers/diffusers-pytorch-compile-cuda
+      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/
+
+    steps:
+    - name: Checkout diffusers
+      uses: actions/checkout@v3
+      with:
+        fetch-depth: 2
+
+    - name: NVIDIA-SMI
+      run: |
+        nvidia-smi
+    - name: Install dependencies
+      run: |
+        python -m pip install -e .[quality,test,training]
+    - name: Environment
+      run: |
+        python utils/print_env.py
+    - name: Run example tests on GPU
+      env:
+        HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+      run: |
+        python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v -k "compile" --make-reports=tests_torch_compile_cuda tests/
+    - name: Failure short reports
+      if: ${{ failure() }}
+      run: cat reports/tests_torch_compile_cuda_failures_short.txt
+
+    - name: Test suite reports artifacts
+      if: ${{ always() }}
+      uses: actions/upload-artifact@v2
+      with:
+        name: torch_compile_test_reports
+        path: reports
+
+  run_examples_tests:
+    name: Examples PyTorch CUDA tests on Ubuntu
+
+    runs-on: docker-gpu
+
+    container:
+      image: diffusers/diffusers-pytorch-cuda
+      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/
+
+    steps:
+    - name: Checkout diffusers
+      uses: actions/checkout@v3
+      with:
+        fetch-depth: 2
+
+    - name: NVIDIA-SMI
+      run: |
+        nvidia-smi
+
+    - name: Install dependencies
+      run: |
+        python -m pip install -e .[quality,test,training]
+
+    - name: Environment
+      run: |
+        python utils/print_env.py
+
+    - name: Run example tests on GPU
+      env:
+        HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+      run: |
+        python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v --make-reports=examples_torch_cuda examples/
+
+    - name: Failure short reports
+      if: ${{ failure() }}
+      run: |
+        cat reports/examples_torch_cuda_stats.txt
+        cat reports/examples_torch_cuda_failures_short.txt
+
+    - name: Test suite reports artifacts
+      if: ${{ always() }}
+      uses: actions/upload-artifact@v2
+      with:
+        name: examples_test_reports
+        path: reports
\ No newline at end of file

From ffaf30f586c0a70db2776075bb2545c49d5c80e1 Mon Sep 17 00:00:00 2001
From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com>
Date: Wed, 11 Oct 2023 18:21:07 +0200
Subject: [PATCH 109/134] Update src/diffusers/loaders.py

Co-authored-by: Benjamin Bossan <BenjaminBossan@users.noreply.github.com>
---
 src/diffusers/loaders.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index 35edc320a809..f3479b3a6a36 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -710,7 +710,7 @@ def _unfuse_lora_apply(self, module):
     def set_adapters(
         self,
         adapter_names: Union[List[str], str],
-        weights: List[float] = None,
+        weights: Optional[List[float]] = None,
     ):
         """
         Sets the adapter layers for the unet.

From 836e32e480b0e3c1b647a593597fdef394aa655b Mon Sep 17 00:00:00 2001
From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com>
Date: Wed, 11 Oct 2023 18:23:58 +0200
Subject: [PATCH 110/134] Update src/diffusers/loaders.py

Co-authored-by: Benjamin Bossan <BenjaminBossan@users.noreply.github.com>
---
 src/diffusers/loaders.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index f3479b3a6a36..b2e182e3c22b 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -1616,9 +1616,9 @@ def load_lora_into_unet(
                 network_alphas = convert_unet_state_dict_to_peft(network_alphas)
 
             rank = {}
-            for key in state_dict.keys():
+            for key, val in state_dict.items():
                 if "lora_B" in key:
-                    rank[key] = state_dict[key].shape[1]
+                    rank[key] = val.shape[1]
 
             lora_config_kwargs = get_peft_kwargs(rank, network_alphas, state_dict, is_unet=True)
             lora_config = LoraConfig(**lora_config_kwargs)

From 2fa61fc3d53e8bbbf91ccd86c0619aca80a14663 Mon Sep 17 00:00:00 2001
From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com>
Date: Wed, 11 Oct 2023 18:24:16 +0200
Subject: [PATCH 111/134] Update src/diffusers/loaders.py

Co-authored-by: Benjamin Bossan <BenjaminBossan@users.noreply.github.com>
---
 src/diffusers/loaders.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index b2e182e3c22b..bd9996cd3ac3 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -1627,7 +1627,7 @@ def load_lora_into_unet(
             if adapter_name is None:
                 adapter_name = get_adapter_name(unet)
 
-            # In case the pipeline has been already offloaded to CPU - temporarly remove the hooks
+            # In case the pipeline has been already offloaded to CPU - temporarily remove the hooks
             # otherwise loading LoRA weights will lead to an error
             is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline)
 

From 9102399cca015eb5a5a58f72198cb5e304435aa1 Mon Sep 17 00:00:00 2001
From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com>
Date: Wed, 11 Oct 2023 18:24:39 +0200
Subject: [PATCH 112/134] Update src/diffusers/loaders.py

Co-authored-by: Benjamin Bossan <BenjaminBossan@users.noreply.github.com>
---
 src/diffusers/loaders.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index bd9996cd3ac3..e2e8dbecbe04 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -2470,7 +2470,7 @@ def enable_lora_for_text_encoder(self, text_encoder: Optional[PreTrainedModel] =
     def set_adapters(
         self,
         adapter_names: Union[List[str], str],
-        adapter_weights: List[float] = None,
+        adapter_weights: Optional[List[float]] = None,
     ):
         # Handle the UNET
         self.unet.set_adapters(adapter_names, adapter_weights)

From 924222e0eb276d2e4b48458df912cb6e0a4a42d3 Mon Sep 17 00:00:00 2001
From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com>
Date: Thu, 12 Oct 2023 16:33:13 +0200
Subject: [PATCH 113/134] Apply suggestions from code review

Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
Co-authored-by: Sayak Paul <spsayakpaul@gmail.com>
---
 src/diffusers/loaders.py | 38 ++++++++++++++++++++++++--------------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index e2e8dbecbe04..eafece74d483 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -402,7 +402,7 @@ def load_attn_procs(self, pretrained_model_name_or_path_or_dict: Union[str, Dict
         # fill attn processors
         lora_layers_list = []
 
-        is_lora = all(("lora" in k or k.endswith(".alpha")) for k in state_dict.keys()) and not self.use_peft_backend
+        is_lora = all(("lora" in k or k.endswith(".alpha")) for k in state_dict.keys()) and not USE_PEFT_BACKEND
         is_custom_diffusion = any("custom_diffusion" in k for k in state_dict.keys())
 
         if is_lora:
@@ -516,7 +516,7 @@ def load_attn_procs(self, pretrained_model_name_or_path_or_dict: Union[str, Dict
                         cross_attention_dim=cross_attention_dim,
                     )
                     attn_processors[key].load_state_dict(value_dict)
-        elif self.use_peft_backend:
+        elif USE_PEFT_BACKEND:
             # In that case we have nothing to do as loading the adapter weights is already handled above by `set_peft_model_state_dict`
             # on the Unet
             pass
@@ -532,7 +532,7 @@ def load_attn_procs(self, pretrained_model_name_or_path_or_dict: Union[str, Dict
         is_sequential_cpu_offload = False
 
         # For PEFT backend the Unet is already offloaded at this stage as it is handled inside `lora_lora_weights_into_unet`
-        if not self.use_peft_backend:
+        if not USE_PEFT_BACKEND:
             if _pipeline is not None:
                 for _, component in _pipeline.components.items():
                     if isinstance(component, nn.Module):
@@ -722,7 +722,7 @@ def set_adapters(
                 The adapter(s) weights to use with the UNet. If `None`, the weights are set to `1.0` for all the
                 adapters.
         """
-        if not self.use_peft_backend:
+        if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for `set_adapters()`.")
 
         def process_weights(adapter_names, weights):
@@ -1862,7 +1862,7 @@ def _remove_text_encoder_monkey_patch(self):
             remove_method(self.text_encoder)
 
             # In case text encoder have no Lora attached
-            if self.use_peft_backend and getattr(self.text_encoder, "peft_config", None) is not None:
+            if USE_PEFT_BACKEND and getattr(self.text_encoder, "peft_config", None) is not None:
                 del self.text_encoder.peft_config
                 self.text_encoder._hf_peft_config_loaded = None
         if hasattr(self, "text_encoder_2"):
@@ -2243,9 +2243,9 @@ def unload_lora_weights(self):
         >>> ...
         ```
         """
-        if not self.use_peft_backend:
+        if not USE_PEFT_BACKEND:
             if version.parse(__version__) > version.parse("0.23"):
-                warnings.warn(
+                logger.warn(
                     "You are using `unload_lora_weights` to disable and unload lora weights. If you want to iteratively enable and disable adapter weights, you can use `pipe.enable_lora()` or `pipe.disable_lora()`. After"
                     " installing the latest version of PEFT."
                 )
@@ -2350,7 +2350,7 @@ def unfuse_lora(self, unfuse_unet: bool = True, unfuse_text_encoder: bool = True
                 LoRA parameters then it won't have any effect.
         """
         if unfuse_unet:
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 self.unet.unfuse_lora()
             else:
                 from peft.tuners.tuners_utils import BaseTunerLayer
@@ -2482,7 +2482,7 @@ def set_adapters(
             self.set_adapters_for_text_encoder(adapter_names, self.text_encoder_2, adapter_weights)
 
     def disable_lora(self):
-        if not self.use_peft_backend:
+        if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
 
         # Disable unet adapters
@@ -2495,7 +2495,7 @@ def disable_lora(self):
             self.disable_lora_for_text_encoder(self.text_encoder_2)
 
     def enable_lora(self):
-        if not self.use_peft_backend:
+        if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
 
         # Enable unet adapters
@@ -2509,9 +2509,19 @@ def enable_lora(self):
 
     def get_active_adapters(self) -> Optional[List[str]]:
         """
-        Gets the list of the current active adapters
+        Gets the list of the current active adapters.
+        
+        Example:
+        
+        ```python
+        from diffusers import DiffusionPipeline
+        
+        pipeline = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0",).to("cuda")
+        pipeline.load_lora_weights("CiroN2022/toy-face", weight_name="toy_face_sdxl.safetensors", adapter_name="toy")
+        pipeline.get_active_adapters()
+        ```
         """
-        if not self.use_peft_backend:
+        if not USE_PEFT_BACKEND:
             raise ValueError(
                 "PEFT backend is required for this method. Please install the latest version of PEFT `pip install -U peft`"
             )
@@ -2531,7 +2541,7 @@ def get_list_adapters(self) -> Dict[str, List[str]]:
         """
         Gets the current list of all available adapters in the pipeline.
         """
-        if not self.use_peft_backend:
+        if not USE_PEFT_BACKEND:
             raise ValueError(
                 "PEFT backend is required for this method. Please install the latest version of PEFT `pip install -U peft`"
             )
@@ -2562,7 +2572,7 @@ def set_lora_device(self, adapter_names: List[str], device: Union[torch.device,
             device (`Union[torch.device, str, int]`):
                 Device to send the adapters to. Can be either a torch device, a str or an integer.
         """
-        if not self.use_peft_backend:
+        if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
 
         from peft.tuners.tuners_utils import BaseTunerLayer

From 21a279aaa6fbf6d99f843f98babfb2d24d38c223 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Thu, 12 Oct 2023 14:37:02 +0000
Subject: [PATCH 114/134] move `MIN_PEFT_VERSION`

---
 src/diffusers/loaders.py          | 11 ++++++-----
 src/diffusers/utils/__init__.py   |  2 +-
 src/diffusers/utils/constants.py  |  4 +++-
 src/diffusers/utils/peft_utils.py |  3 ---
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index eafece74d483..c61b80e3574a 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 import os
 import re
-import warnings
 from collections import defaultdict
 from contextlib import nullcontext
 from io import BytesIO
@@ -2510,13 +2509,15 @@ def enable_lora(self):
     def get_active_adapters(self) -> Optional[List[str]]:
         """
         Gets the list of the current active adapters.
-        
+
         Example:
-        
+
         ```python
         from diffusers import DiffusionPipeline
-        
-        pipeline = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0",).to("cuda")
+
+        pipeline = DiffusionPipeline.from_pretrained(
+            "stabilityai/stable-diffusion-xl-base-1.0",
+        ).to("cuda")
         pipeline.load_lora_weights("CiroN2022/toy-face", weight_name="toy_face_sdxl.safetensors", adapter_name="toy")
         pipeline.get_active_adapters()
         ```
diff --git a/src/diffusers/utils/__init__.py b/src/diffusers/utils/__init__.py
index 54be153a311b..b4d6bdab33eb 100644
--- a/src/diffusers/utils/__init__.py
+++ b/src/diffusers/utils/__init__.py
@@ -26,6 +26,7 @@
     FLAX_WEIGHTS_NAME,
     HF_MODULES_CACHE,
     HUGGINGFACE_CO_RESOLVE_ENDPOINT,
+    MIN_PEFT_VERSION,
     ONNX_EXTERNAL_WEIGHTS_NAME,
     ONNX_WEIGHTS_NAME,
     SAFETENSORS_WEIGHTS_NAME,
@@ -87,7 +88,6 @@
 from .logging import get_logger
 from .outputs import BaseOutput
 from .peft_utils import (
-    MIN_PEFT_VERSION,
     check_peft_version,
     get_adapter_name,
     get_peft_kwargs,
diff --git a/src/diffusers/utils/constants.py b/src/diffusers/utils/constants.py
index 05ea4067b414..3023cb476fe0 100644
--- a/src/diffusers/utils/constants.py
+++ b/src/diffusers/utils/constants.py
@@ -22,6 +22,8 @@
 
 default_cache_path = HUGGINGFACE_HUB_CACHE
 
+MIN_PEFT_VERSION = "0.5.0"
+
 
 CONFIG_NAME = "config.json"
 WEIGHTS_NAME = "diffusion_pytorch_model.bin"
@@ -41,7 +43,7 @@
 # For PEFT it is has to be greater than 0.6.0 and for transformers it has to be greater than 4.33.1.
 _required_peft_version = is_peft_available() and version.parse(
     version.parse(importlib.metadata.version("peft")).base_version
-) > version.parse("0.5")
+) > version.parse(MIN_PEFT_VERSION)
 _required_transformers_version = is_transformers_available() and version.parse(
     version.parse(importlib.metadata.version("transformers")).base_version
 ) > version.parse("4.33")
diff --git a/src/diffusers/utils/peft_utils.py b/src/diffusers/utils/peft_utils.py
index 3d3cdcc19fc0..efc977518b14 100644
--- a/src/diffusers/utils/peft_utils.py
+++ b/src/diffusers/utils/peft_utils.py
@@ -22,9 +22,6 @@
 from .import_utils import is_peft_available, is_torch_available
 
 
-MIN_PEFT_VERSION = "0.5.0"
-
-
 def recurse_remove_peft_layers(model):
     if is_torch_available():
         import torch

From 0fe4203827bec39ed71808b298f15ed25d80d99b Mon Sep 17 00:00:00 2001
From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com>
Date: Thu, 12 Oct 2023 16:39:57 +0200
Subject: [PATCH 115/134] Apply suggestions from code review

Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
---
 src/diffusers/models/unet_2d_condition.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/diffusers/models/unet_2d_condition.py b/src/diffusers/models/unet_2d_condition.py
index b8f78f260977..9f6f3e666029 100644
--- a/src/diffusers/models/unet_2d_condition.py
+++ b/src/diffusers/models/unet_2d_condition.py
@@ -995,7 +995,7 @@ def forward(
 
         # 3. down
         lora_scale = cross_attention_kwargs.get("scale", 1.0) if cross_attention_kwargs is not None else 1.0
-        if self.use_peft_backend:
+        if USE_PEFT_BACKEND:
             # weight the lora layers by setting `lora_scale` for each PEFT layer
             scale_lora_layers(self, lora_scale)
 
@@ -1097,7 +1097,7 @@ def forward(
             sample = self.conv_act(sample)
         sample = self.conv_out(sample)
 
-        if self.use_peft_backend:
+        if USE_PEFT_BACKEND:
             # remove `lora_scale` from each PEFT layer
             unscale_lora_layers(self)
 

From e981af258a0159313842bd0c130cc7866c8522f6 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Thu, 12 Oct 2023 14:52:12 +0000
Subject: [PATCH 116/134] let's not use class variable

---
 src/diffusers/loaders.py                      | 32 +++++++++----------
 src/diffusers/models/unet_2d_condition.py     |  2 +-
 .../alt_diffusion/pipeline_alt_diffusion.py   | 13 ++++++--
 .../pipeline_alt_diffusion_img2img.py         |  5 +--
 .../controlnet/pipeline_controlnet.py         | 13 ++++++--
 .../controlnet/pipeline_controlnet_img2img.py |  5 +--
 .../controlnet/pipeline_controlnet_inpaint.py | 13 ++++++--
 .../pipeline_controlnet_inpaint_sd_xl.py      |  5 +--
 .../controlnet/pipeline_controlnet_sd_xl.py   |  6 ++--
 .../pipeline_controlnet_sd_xl_img2img.py      |  5 +--
 .../pipeline_cycle_diffusion.py               |  6 ++--
 .../pipeline_stable_diffusion.py              | 13 ++++++--
 ...line_stable_diffusion_attend_and_excite.py | 13 ++++++--
 .../pipeline_stable_diffusion_depth2img.py    |  6 ++--
 .../pipeline_stable_diffusion_diffedit.py     |  5 +--
 .../pipeline_stable_diffusion_gligen.py       | 13 ++++++--
 ...line_stable_diffusion_gligen_text_image.py |  6 ++--
 .../pipeline_stable_diffusion_img2img.py      |  5 +--
 .../pipeline_stable_diffusion_inpaint.py      |  6 ++--
 ...ipeline_stable_diffusion_inpaint_legacy.py |  6 ++--
 .../pipeline_stable_diffusion_k_diffusion.py  |  6 ++--
 .../pipeline_stable_diffusion_ldm3d.py        | 14 ++++++--
 ...pipeline_stable_diffusion_model_editing.py |  6 ++--
 .../pipeline_stable_diffusion_panorama.py     | 13 ++++++--
 .../pipeline_stable_diffusion_paradigms.py    |  5 +--
 .../pipeline_stable_diffusion_pix2pix_zero.py |  5 +--
 .../pipeline_stable_diffusion_sag.py          | 13 ++++++--
 .../pipeline_stable_diffusion_upscale.py      |  6 ++--
 .../pipeline_stable_unclip.py                 | 13 ++++++--
 .../pipeline_stable_unclip_img2img.py         | 13 ++++++--
 .../pipeline_stable_diffusion_xl.py           |  5 +--
 .../pipeline_stable_diffusion_xl_img2img.py   |  5 +--
 .../pipeline_stable_diffusion_xl_inpaint.py   |  5 +--
 .../pipeline_stable_diffusion_adapter.py      |  5 +--
 .../pipeline_stable_diffusion_xl_adapter.py   | 13 ++++++--
 .../pipeline_text_to_video_synth.py           | 13 ++++++--
 .../pipeline_text_to_video_synth_img2img.py   | 13 ++++++--
 .../unidiffuser/pipeline_unidiffuser.py       |  6 ++--
 .../versatile_diffusion/modeling_text_unet.py |  6 ++--
 39 files changed, 226 insertions(+), 117 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index c61b80e3574a..a33308fa0c93 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -256,7 +256,6 @@ def map_from(module, state_dict, *args, **kwargs):
 class UNet2DConditionLoadersMixin:
     text_encoder_name = TEXT_ENCODER_NAME
     unet_name = UNET_NAME
-    use_peft_backend = USE_PEFT_BACKEND
 
     def load_attn_procs(self, pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]], **kwargs):
         r"""
@@ -682,7 +681,7 @@ def fuse_lora(self, lora_scale=1.0, safe_fusing=False):
         self.apply(self._fuse_lora_apply)
 
     def _fuse_lora_apply(self, module):
-        if not self.use_peft_backend:
+        if not USE_PEFT_BACKEND:
             if hasattr(module, "_fuse_lora"):
                 module._fuse_lora(self.lora_scale, self._safe_fusing)
         else:
@@ -697,7 +696,7 @@ def unfuse_lora(self):
         self.apply(self._unfuse_lora_apply)
 
     def _unfuse_lora_apply(self, module):
-        if not self.use_peft_backend:
+        if not USE_PEFT_BACKEND:
             if hasattr(module, "_unfuse_lora"):
                 module._unfuse_lora()
         else:
@@ -744,7 +743,7 @@ def disable_lora(self):
         """
         Disables the LoRA layers for the unet.
         """
-        if not self.use_peft_backend:
+        if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
         set_adapter_layers(self, enabled=False)
 
@@ -752,7 +751,7 @@ def enable_lora(self):
         """
         Enables the LoRA layers for the unet.
         """
-        if not self.use_peft_backend:
+        if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
         set_adapter_layers(self, enabled=True)
 
@@ -1173,7 +1172,6 @@ class LoraLoaderMixin:
     text_encoder_name = TEXT_ENCODER_NAME
     unet_name = UNET_NAME
     num_fused_loras = 0
-    use_peft_backend = USE_PEFT_BACKEND
 
     def load_lora_weights(
         self, pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]], adapter_name=None, **kwargs
@@ -1604,7 +1602,7 @@ def load_lora_into_unet(
             warn_message = "You have saved the LoRA weights using the old format. To convert the old LoRA weights to the new format, you can first load them in a dictionary and then create a new dictionary like the following: `new_state_dict = {f'unet.{module_name}': params for module_name, params in old_state_dict.items()}`."
             logger.warn(warn_message)
 
-        if cls.use_peft_backend and len(state_dict.keys()) > 0:
+        if USE_PEFT_BACKEND and len(state_dict.keys()) > 0:
             from peft import LoraConfig, inject_adapter_in_model, set_peft_model_state_dict
 
             state_dict = convert_unet_state_dict_to_peft(state_dict)
@@ -1719,7 +1717,7 @@ def load_lora_into_text_encoder(
                 rank = {}
                 text_encoder_lora_state_dict = convert_state_dict_to_diffusers(text_encoder_lora_state_dict)
 
-                if cls.use_peft_backend:
+                if USE_PEFT_BACKEND:
                     # convert state dict
                     text_encoder_lora_state_dict = convert_state_dict_to_peft(text_encoder_lora_state_dict)
 
@@ -1756,7 +1754,7 @@ def load_lora_into_text_encoder(
                         k.replace(f"{prefix}.", ""): v for k, v in network_alphas.items() if k in alpha_keys
                     }
 
-                if cls.use_peft_backend:
+                if USE_PEFT_BACKEND:
                     from peft import LoraConfig
 
                     lora_config_kwargs = get_peft_kwargs(
@@ -1852,7 +1850,7 @@ def lora_scale(self) -> float:
         return self._lora_scale if hasattr(self, "_lora_scale") else 1.0
 
     def _remove_text_encoder_monkey_patch(self):
-        if self.use_peft_backend:
+        if USE_PEFT_BACKEND:
             remove_method = recurse_remove_peft_layers
         else:
             remove_method = self._remove_text_encoder_monkey_patch_classmethod
@@ -1866,7 +1864,7 @@ def _remove_text_encoder_monkey_patch(self):
                 self.text_encoder._hf_peft_config_loaded = None
         if hasattr(self, "text_encoder_2"):
             remove_method(self.text_encoder_2)
-            if self.use_peft_backend:
+            if USE_PEFT_BACKEND:
                 del self.text_encoder_2.peft_config
                 self.text_encoder_2._hf_peft_config_loaded = None
 
@@ -2296,7 +2294,7 @@ def fuse_lora(
         if fuse_unet:
             self.unet.fuse_lora(lora_scale, safe_fusing=safe_fusing)
 
-        if self.use_peft_backend:
+        if USE_PEFT_BACKEND:
             from peft.tuners.tuners_utils import BaseTunerLayer
 
             def fuse_text_encoder_lora(text_encoder, lora_scale=1.0, safe_fusing=False):
@@ -2358,7 +2356,7 @@ def unfuse_lora(self, unfuse_unet: bool = True, unfuse_text_encoder: bool = True
                     if isinstance(module, BaseTunerLayer):
                         module.unmerge()
 
-        if self.use_peft_backend:
+        if USE_PEFT_BACKEND:
             from peft.tuners.tuners_utils import BaseTunerLayer
 
             def unfuse_text_encoder_lora(text_encoder):
@@ -2409,7 +2407,7 @@ def set_adapters_for_text_encoder(
             text_encoder_weights (`List[float]`, *optional*):
                 The weights to use for the text encoder. If `None`, the weights are set to `1.0` for all the adapters.
         """
-        if not self.use_peft_backend:
+        if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
 
         def process_weights(adapter_names, weights):
@@ -2442,7 +2440,7 @@ def disable_lora_for_text_encoder(self, text_encoder: Optional[PreTrainedModel]
                 The text encoder module to disable the LoRA layers for. If `None`, it will try to get the
                 `text_encoder` attribute.
         """
-        if not self.use_peft_backend:
+        if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
 
         text_encoder = text_encoder or getattr(self, "text_encoder", None)
@@ -2459,7 +2457,7 @@ def enable_lora_for_text_encoder(self, text_encoder: Optional[PreTrainedModel] =
                 The text encoder module to enable the LoRA layers for. If `None`, it will try to get the `text_encoder`
                 attribute.
         """
-        if not self.use_peft_backend:
+        if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
         text_encoder = text_encoder or getattr(self, "text_encoder", None)
         if text_encoder is None:
@@ -3319,7 +3317,7 @@ def pack_weights(layers, prefix):
         )
 
     def _remove_text_encoder_monkey_patch(self):
-        if self.use_peft_backend:
+        if USE_PEFT_BACKEND:
             recurse_remove_peft_layers(self.text_encoder)
             # TODO: @younesbelkada handle this in transformers side
             if getattr(self.text_encoder, "peft_config", None) is not None:
diff --git a/src/diffusers/models/unet_2d_condition.py b/src/diffusers/models/unet_2d_condition.py
index 9f6f3e666029..4039fbfcc67a 100644
--- a/src/diffusers/models/unet_2d_condition.py
+++ b/src/diffusers/models/unet_2d_condition.py
@@ -20,7 +20,7 @@
 
 from ..configuration_utils import ConfigMixin, register_to_config
 from ..loaders import UNet2DConditionLoadersMixin
-from ..utils import BaseOutput, logging, scale_lora_layers, unscale_lora_layers
+from ..utils import USE_PEFT_BACKEND, BaseOutput, logging, scale_lora_layers, unscale_lora_layers
 from .activations import get_activation
 from .attention_processor import (
     ADDED_KV_ATTENTION_PROCESSORS,
diff --git a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py
index 874d2750e7a3..18518cc3783f 100644
--- a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py
+++ b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py
@@ -25,7 +25,14 @@
 from ...models import AutoencoderKL, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import deprecate, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
+from ...utils import (
+    USE_PEFT_BACKEND,
+    deprecate,
+    logging,
+    replace_example_docstring,
+    scale_lora_layers,
+    unscale_lora_layers,
+)
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -304,7 +311,7 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             # dynamically adjust the LoRA scale
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
                 scale_lora_layers(self.text_encoder, lora_scale)
@@ -432,7 +439,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py
index 47fa019647d4..404a4277e7cc 100644
--- a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py
+++ b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py
@@ -29,6 +29,7 @@
 from ...schedulers import KarrasDiffusionSchedulers
 from ...utils import (
     PIL_INTERPOLATION,
+    USE_PEFT_BACKEND,
     deprecate,
     logging,
     replace_example_docstring,
@@ -309,7 +310,7 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             # dynamically adjust the LoRA scale
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
                 scale_lora_layers(self.text_encoder, lora_scale)
@@ -437,7 +438,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py
index ad0060976440..87259378b8a2 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py
@@ -27,7 +27,14 @@
 from ...models import AutoencoderKL, ControlNetModel, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import deprecate, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
+from ...utils import (
+    USE_PEFT_BACKEND,
+    deprecate,
+    logging,
+    replace_example_docstring,
+    scale_lora_layers,
+    unscale_lora_layers,
+)
 from ...utils.torch_utils import is_compiled_module, randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from ..stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
@@ -287,7 +294,7 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             # dynamically adjust the LoRA scale
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
                 scale_lora_layers(self.text_encoder, lora_scale)
@@ -415,7 +422,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py
index ef34ad3ee70a..98075a3d7253 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py
@@ -27,6 +27,7 @@
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
 from ...utils import (
+    USE_PEFT_BACKEND,
     deprecate,
     logging,
     replace_example_docstring,
@@ -317,7 +318,7 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             # dynamically adjust the LoRA scale
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
                 scale_lora_layers(self.text_encoder, lora_scale)
@@ -445,7 +446,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py
index 640ca0a22e9c..324aa1e0f81c 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py
@@ -28,7 +28,14 @@
 from ...models import AutoencoderKL, ControlNetModel, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import deprecate, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
+from ...utils import (
+    USE_PEFT_BACKEND,
+    deprecate,
+    logging,
+    replace_example_docstring,
+    scale_lora_layers,
+    unscale_lora_layers,
+)
 from ...utils.torch_utils import is_compiled_module, randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from ..stable_diffusion import StableDiffusionPipelineOutput
@@ -438,7 +445,7 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             # dynamically adjust the LoRA scale
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
                 scale_lora_layers(self.text_encoder, lora_scale)
@@ -566,7 +573,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py
index 41b0d5434386..4a843a4ed883 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py
@@ -33,6 +33,7 @@
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
 from ...utils import (
+    USE_PEFT_BACKEND,
     is_invisible_watermark_available,
     logging,
     replace_example_docstring,
@@ -316,7 +317,7 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             # dynamically adjust the LoRA scale
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
                 adjust_lora_scale_text_encoder(self.text_encoder_2, lora_scale)
             else:
@@ -458,7 +459,7 @@ def encode_prompt(
                 bs_embed * num_images_per_prompt, -1
             )
 
-        if isinstance(self, StableDiffusionXLLoraLoaderMixin) and self.use_peft_backend:
+        if isinstance(self, StableDiffusionXLLoraLoaderMixin) and USE_PEFT_BACKEND:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
             unscale_lora_layers(self.text_encoder_2)
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py
index 7f230c2ec058..f555ea49b3ab 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py
@@ -35,7 +35,7 @@
 )
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
+from ...utils import USE_PEFT_BACKEND, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
 from ...utils.torch_utils import is_compiled_module, randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
@@ -285,7 +285,7 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             # dynamically adjust the LoRA scale
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
                 adjust_lora_scale_text_encoder(self.text_encoder_2, lora_scale)
             else:
@@ -427,7 +427,7 @@ def encode_prompt(
                 bs_embed * num_images_per_prompt, -1
             )
 
-        if isinstance(self, StableDiffusionXLLoraLoaderMixin) and self.use_peft_backend:
+        if isinstance(self, StableDiffusionXLLoraLoaderMixin) and USE_PEFT_BACKEND:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
             unscale_lora_layers(self.text_encoder_2)
diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py
index aeffc219674d..849e34153285 100644
--- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py
+++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py
@@ -36,6 +36,7 @@
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
 from ...utils import (
+    USE_PEFT_BACKEND,
     logging,
     replace_example_docstring,
     scale_lora_layers,
@@ -328,7 +329,7 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             # dynamically adjust the LoRA scale
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
                 adjust_lora_scale_text_encoder(self.text_encoder_2, lora_scale)
             else:
@@ -470,7 +471,7 @@ def encode_prompt(
                 bs_embed * num_images_per_prompt, -1
             )
 
-        if isinstance(self, StableDiffusionXLLoraLoaderMixin) and self.use_peft_backend:
+        if isinstance(self, StableDiffusionXLLoraLoaderMixin) and USE_PEFT_BACKEND:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
             unscale_lora_layers(self.text_encoder_2)
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py
index 6bcbbab135df..d45e35d5cba0 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py
@@ -27,7 +27,7 @@
 from ...models import AutoencoderKL, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import DDIMScheduler
-from ...utils import PIL_INTERPOLATION, deprecate, logging, scale_lora_layers, unscale_lora_layers
+from ...utils import PIL_INTERPOLATION, USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from .pipeline_output import StableDiffusionPipelineOutput
@@ -308,7 +308,7 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             # dynamically adjust the LoRA scale
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
                 scale_lora_layers(self.text_encoder, lora_scale)
@@ -436,7 +436,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
index f36faa85c164..a9d28144e543 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
@@ -25,7 +25,14 @@
 from ...models import AutoencoderKL, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import deprecate, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
+from ...utils import (
+    USE_PEFT_BACKEND,
+    deprecate,
+    logging,
+    replace_example_docstring,
+    scale_lora_layers,
+    unscale_lora_layers,
+)
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from .pipeline_output import StableDiffusionPipelineOutput
@@ -297,7 +304,7 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             # dynamically adjust the LoRA scale
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
                 scale_lora_layers(self.text_encoder, lora_scale)
@@ -425,7 +432,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py
index e49e12b92ea3..153efae876cd 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py
@@ -27,7 +27,14 @@
 from ...models.attention_processor import Attention
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import deprecate, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
+from ...utils import (
+    USE_PEFT_BACKEND,
+    deprecate,
+    logging,
+    replace_example_docstring,
+    scale_lora_layers,
+    unscale_lora_layers,
+)
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import StableDiffusionPipelineOutput
@@ -332,7 +339,7 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             # dynamically adjust the LoRA scale
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
                 scale_lora_layers(self.text_encoder, lora_scale)
@@ -460,7 +467,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py
index 95c3a79cf0c5..d73cf769e3ae 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py
@@ -28,7 +28,7 @@
 from ...models import AutoencoderKL, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import PIL_INTERPOLATION, deprecate, logging, scale_lora_layers, unscale_lora_layers
+from ...utils import PIL_INTERPOLATION, USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
 
@@ -213,7 +213,7 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             # dynamically adjust the LoRA scale
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
                 scale_lora_layers(self.text_encoder, lora_scale)
@@ -341,7 +341,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py
index 7126b798feb5..451ef690a759 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py
@@ -30,6 +30,7 @@
 from ...schedulers import DDIMInverseScheduler, KarrasDiffusionSchedulers
 from ...utils import (
     PIL_INTERPOLATION,
+    USE_PEFT_BACKEND,
     BaseOutput,
     deprecate,
     logging,
@@ -483,7 +484,7 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             # dynamically adjust the LoRA scale
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
                 scale_lora_layers(self.text_encoder, lora_scale)
@@ -611,7 +612,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py
index f176f08d5d8c..ce7faaed2ab1 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py
@@ -26,7 +26,14 @@
 from ...models.attention import GatedSelfAttentionDense
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import deprecate, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
+from ...utils import (
+    USE_PEFT_BACKEND,
+    deprecate,
+    logging,
+    replace_example_docstring,
+    scale_lora_layers,
+    unscale_lora_layers,
+)
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import StableDiffusionPipelineOutput
@@ -274,7 +281,7 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             # dynamically adjust the LoRA scale
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
                 scale_lora_layers(self.text_encoder, lora_scale)
@@ -402,7 +409,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen_text_image.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen_text_image.py
index ba418b4cb3c3..67f3fe0e9448 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen_text_image.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen_text_image.py
@@ -32,7 +32,7 @@
 from ...models.attention import GatedSelfAttentionDense
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
+from ...utils import USE_PEFT_BACKEND, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import StableDiffusionPipelineOutput
@@ -306,7 +306,7 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             # dynamically adjust the LoRA scale
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
                 scale_lora_layers(self.text_encoder, lora_scale)
@@ -434,7 +434,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
index 8c180f5224b7..ffbd8246603e 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
@@ -29,6 +29,7 @@
 from ...schedulers import KarrasDiffusionSchedulers
 from ...utils import (
     PIL_INTERPOLATION,
+    USE_PEFT_BACKEND,
     deprecate,
     logging,
     replace_example_docstring,
@@ -304,7 +305,7 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             # dynamically adjust the LoRA scale
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
                 scale_lora_layers(self.text_encoder, lora_scale)
@@ -432,7 +433,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
index e792eb8f8c12..e185ed588047 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
@@ -27,7 +27,7 @@
 from ...models import AsymmetricAutoencoderKL, AutoencoderKL, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import deprecate, logging, scale_lora_layers, unscale_lora_layers
+from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import StableDiffusionPipelineOutput
@@ -375,7 +375,7 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             # dynamically adjust the LoRA scale
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
                 scale_lora_layers(self.text_encoder, lora_scale)
@@ -503,7 +503,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py
index 4b555e0367c6..513c660c30cf 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py
@@ -27,7 +27,7 @@
 from ...models import AutoencoderKL, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import PIL_INTERPOLATION, deprecate, logging, scale_lora_layers, unscale_lora_layers
+from ...utils import PIL_INTERPOLATION, USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import StableDiffusionPipelineOutput
@@ -297,7 +297,7 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             # dynamically adjust the LoRA scale
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
                 scale_lora_layers(self.text_encoder, lora_scale)
@@ -425,7 +425,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py
index a5c447792ff5..e0bb9b6e0b14 100755
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py
@@ -24,7 +24,7 @@
 from ...loaders import LoraLoaderMixin, TextualInversionLoaderMixin
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import LMSDiscreteScheduler
-from ...utils import deprecate, logging, scale_lora_layers, unscale_lora_layers
+from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import StableDiffusionPipelineOutput
@@ -211,7 +211,7 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             # dynamically adjust the LoRA scale
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
                 scale_lora_layers(self.text_encoder, lora_scale)
@@ -339,7 +339,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py
index eb3ba4b90a71..2e514a55108c 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py
@@ -26,7 +26,15 @@
 from ...models import AutoencoderKL, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import BaseOutput, deprecate, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
+from ...utils import (
+    USE_PEFT_BACKEND,
+    BaseOutput,
+    deprecate,
+    logging,
+    replace_example_docstring,
+    scale_lora_layers,
+    unscale_lora_layers,
+)
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from .safety_checker import StableDiffusionSafetyChecker
@@ -267,7 +275,7 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             # dynamically adjust the LoRA scale
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
                 scale_lora_layers(self.text_encoder, lora_scale)
@@ -395,7 +403,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py
index e67c04ebcf7c..6c78d190d97f 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py
@@ -24,7 +24,7 @@
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import PNDMScheduler
 from ...schedulers.scheduling_utils import SchedulerMixin
-from ...utils import deprecate, logging, scale_lora_layers, unscale_lora_layers
+from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import StableDiffusionPipelineOutput
@@ -244,7 +244,7 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             # dynamically adjust the LoRA scale
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
                 scale_lora_layers(self.text_encoder, lora_scale)
@@ -372,7 +372,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py
index 1704e28f0c7f..bac1f83fb336 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py
@@ -23,7 +23,14 @@
 from ...models import AutoencoderKL, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import DDIMScheduler
-from ...utils import deprecate, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
+from ...utils import (
+    USE_PEFT_BACKEND,
+    deprecate,
+    logging,
+    replace_example_docstring,
+    scale_lora_layers,
+    unscale_lora_layers,
+)
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import StableDiffusionPipelineOutput
@@ -221,7 +228,7 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             # dynamically adjust the LoRA scale
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
                 scale_lora_layers(self.text_encoder, lora_scale)
@@ -349,7 +356,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py
index 256286904804..161f656fee2e 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py
@@ -24,6 +24,7 @@
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
 from ...utils import (
+    USE_PEFT_BACKEND,
     deprecate,
     logging,
     replace_example_docstring,
@@ -258,7 +259,7 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             # dynamically adjust the LoRA scale
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
                 scale_lora_layers(self.text_encoder, lora_scale)
@@ -386,7 +387,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py
index 6cbea1d1da7e..6d4286a04686 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py
@@ -37,6 +37,7 @@
 from ...schedulers.scheduling_ddim_inverse import DDIMInverseScheduler
 from ...utils import (
     PIL_INTERPOLATION,
+    USE_PEFT_BACKEND,
     BaseOutput,
     deprecate,
     logging,
@@ -448,7 +449,7 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             # dynamically adjust the LoRA scale
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
                 scale_lora_layers(self.text_encoder, lora_scale)
@@ -576,7 +577,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py
index 42cc9905c49a..6a78d4da4545 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py
@@ -24,7 +24,14 @@
 from ...models import AutoencoderKL, UNet2DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import deprecate, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
+from ...utils import (
+    USE_PEFT_BACKEND,
+    deprecate,
+    logging,
+    replace_example_docstring,
+    scale_lora_layers,
+    unscale_lora_layers,
+)
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import StableDiffusionPipelineOutput
@@ -244,7 +251,7 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             # dynamically adjust the LoRA scale
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
                 scale_lora_layers(self.text_encoder, lora_scale)
@@ -372,7 +379,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
index 8d01e0a0d086..f3aa01ebeebb 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
@@ -32,7 +32,7 @@
 )
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import DDPMScheduler, KarrasDiffusionSchedulers
-from ...utils import deprecate, logging, scale_lora_layers, unscale_lora_layers
+from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import StableDiffusionPipelineOutput
@@ -242,7 +242,7 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             # dynamically adjust the LoRA scale
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
                 scale_lora_layers(self.text_encoder, lora_scale)
@@ -370,7 +370,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py
index 3b12058eda7b..3bce80fdb5b1 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py
@@ -25,7 +25,14 @@
 from ...models.embeddings import get_timestep_embedding
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import deprecate, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
+from ...utils import (
+    USE_PEFT_BACKEND,
+    deprecate,
+    logging,
+    replace_example_docstring,
+    scale_lora_layers,
+    unscale_lora_layers,
+)
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
 from .stable_unclip_image_normalizer import StableUnCLIPImageNormalizer
@@ -342,7 +349,7 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             # dynamically adjust the LoRA scale
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
                 scale_lora_layers(self.text_encoder, lora_scale)
@@ -470,7 +477,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py
index 3ef1994b0cb3..a17a674b7066 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py
@@ -25,7 +25,14 @@
 from ...models.embeddings import get_timestep_embedding
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import deprecate, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
+from ...utils import (
+    USE_PEFT_BACKEND,
+    deprecate,
+    logging,
+    replace_example_docstring,
+    scale_lora_layers,
+    unscale_lora_layers,
+)
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
 from .stable_unclip_image_normalizer import StableUnCLIPImageNormalizer
@@ -296,7 +303,7 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             # dynamically adjust the LoRA scale
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
                 scale_lora_layers(self.text_encoder, lora_scale)
@@ -424,7 +431,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py
index 193686a40419..cbfe4e0d3835 100644
--- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py
+++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py
@@ -34,6 +34,7 @@
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
 from ...utils import (
+    USE_PEFT_BACKEND,
     is_invisible_watermark_available,
     is_torch_xla_available,
     logging,
@@ -274,7 +275,7 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             # dynamically adjust the LoRA scale
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
                 adjust_lora_scale_text_encoder(self.text_encoder_2, lora_scale)
             else:
@@ -416,7 +417,7 @@ def encode_prompt(
                 bs_embed * num_images_per_prompt, -1
             )
 
-        if isinstance(self, StableDiffusionXLLoraLoaderMixin) and self.use_peft_backend:
+        if isinstance(self, StableDiffusionXLLoraLoaderMixin) and USE_PEFT_BACKEND:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
             unscale_lora_layers(self.text_encoder_2)
diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
index a5fb134f9913..75f814ca84cd 100644
--- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
+++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
@@ -31,6 +31,7 @@
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
 from ...utils import (
+    USE_PEFT_BACKEND,
     is_invisible_watermark_available,
     is_torch_xla_available,
     logging,
@@ -281,7 +282,7 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             # dynamically adjust the LoRA scale
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
                 adjust_lora_scale_text_encoder(self.text_encoder_2, lora_scale)
             else:
@@ -423,7 +424,7 @@ def encode_prompt(
                 bs_embed * num_images_per_prompt, -1
             )
 
-        if isinstance(self, StableDiffusionXLLoraLoaderMixin) and self.use_peft_backend:
+        if isinstance(self, StableDiffusionXLLoraLoaderMixin) and USE_PEFT_BACKEND:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
             unscale_lora_layers(self.text_encoder_2)
diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py
index a6e0531eae3a..39ec59048f39 100644
--- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py
+++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py
@@ -32,6 +32,7 @@
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
 from ...utils import (
+    USE_PEFT_BACKEND,
     deprecate,
     is_invisible_watermark_available,
     is_torch_xla_available,
@@ -430,7 +431,7 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             # dynamically adjust the LoRA scale
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
                 adjust_lora_scale_text_encoder(self.text_encoder_2, lora_scale)
             else:
@@ -572,7 +573,7 @@ def encode_prompt(
                 bs_embed * num_images_per_prompt, -1
             )
 
-        if isinstance(self, StableDiffusionXLLoraLoaderMixin) and self.use_peft_backend:
+        if isinstance(self, StableDiffusionXLLoraLoaderMixin) and USE_PEFT_BACKEND:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
             unscale_lora_layers(self.text_encoder_2)
diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py
index 2ab3bf00c8fc..54ba178846f4 100644
--- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py
+++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py
@@ -28,6 +28,7 @@
 from ...schedulers import KarrasDiffusionSchedulers
 from ...utils import (
     PIL_INTERPOLATION,
+    USE_PEFT_BACKEND,
     BaseOutput,
     deprecate,
     logging,
@@ -298,7 +299,7 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             # dynamically adjust the LoRA scale
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
                 scale_lora_layers(self.text_encoder, lora_scale)
@@ -426,7 +427,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py
index b32c852481ab..eb73302f8121 100644
--- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py
+++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py
@@ -31,7 +31,14 @@
 )
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import PIL_INTERPOLATION, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
+from ...utils import (
+    PIL_INTERPOLATION,
+    USE_PEFT_BACKEND,
+    logging,
+    replace_example_docstring,
+    scale_lora_layers,
+    unscale_lora_layers,
+)
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
@@ -283,7 +290,7 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             # dynamically adjust the LoRA scale
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
                 adjust_lora_scale_text_encoder(self.text_encoder_2, lora_scale)
             else:
@@ -425,7 +432,7 @@ def encode_prompt(
                 bs_embed * num_images_per_prompt, -1
             )
 
-        if isinstance(self, StableDiffusionXLLoraLoaderMixin) and self.use_peft_backend:
+        if isinstance(self, StableDiffusionXLLoraLoaderMixin) and USE_PEFT_BACKEND:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
             unscale_lora_layers(self.text_encoder_2)
diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py
index 42c00597beee..83c31596940e 100644
--- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py
+++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py
@@ -23,7 +23,14 @@
 from ...models import AutoencoderKL, UNet3DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import deprecate, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
+from ...utils import (
+    USE_PEFT_BACKEND,
+    deprecate,
+    logging,
+    replace_example_docstring,
+    scale_lora_layers,
+    unscale_lora_layers,
+)
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import TextToVideoSDPipelineOutput
@@ -224,7 +231,7 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             # dynamically adjust the LoRA scale
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
                 scale_lora_layers(self.text_encoder, lora_scale)
@@ -352,7 +359,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py
index c571d3d6bc5e..0d886cb00677 100644
--- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py
+++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py
@@ -24,7 +24,14 @@
 from ...models import AutoencoderKL, UNet3DConditionModel
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import deprecate, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
+from ...utils import (
+    USE_PEFT_BACKEND,
+    deprecate,
+    logging,
+    replace_example_docstring,
+    scale_lora_layers,
+    unscale_lora_layers,
+)
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from . import TextToVideoSDPipelineOutput
@@ -286,7 +293,7 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             # dynamically adjust the LoRA scale
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
                 scale_lora_layers(self.text_encoder, lora_scale)
@@ -414,7 +421,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py b/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py
index 7e0b07cc79ef..0d5880ac0d4f 100644
--- a/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py
+++ b/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py
@@ -18,7 +18,7 @@
 from ...models import AutoencoderKL
 from ...models.lora import adjust_lora_scale_text_encoder
 from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import deprecate, logging, scale_lora_layers, unscale_lora_layers
+from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers
 from ...utils.outputs import BaseOutput
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
@@ -426,7 +426,7 @@ def encode_prompt(
             self._lora_scale = lora_scale
 
             # dynamically adjust the LoRA scale
-            if not self.use_peft_backend:
+            if not USE_PEFT_BACKEND:
                 adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
             else:
                 scale_lora_layers(self.text_encoder, lora_scale)
@@ -554,7 +554,7 @@ def encode_prompt(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
 
-        if isinstance(self, LoraLoaderMixin) and self.use_peft_backend:
+        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
             # Retrieve the original scale by scaling back the LoRA layers
             unscale_lora_layers(self.text_encoder)
 
diff --git a/src/diffusers/pipelines/versatile_diffusion/modeling_text_unet.py b/src/diffusers/pipelines/versatile_diffusion/modeling_text_unet.py
index a7c8ba7783de..2ed3deeb1225 100644
--- a/src/diffusers/pipelines/versatile_diffusion/modeling_text_unet.py
+++ b/src/diffusers/pipelines/versatile_diffusion/modeling_text_unet.py
@@ -31,7 +31,7 @@
 )
 from ...models.transformer_2d import Transformer2DModel
 from ...models.unet_2d_condition import UNet2DConditionOutput
-from ...utils import is_torch_version, logging, scale_lora_layers, unscale_lora_layers
+from ...utils import USE_PEFT_BACKEND, is_torch_version, logging, scale_lora_layers, unscale_lora_layers
 from ...utils.torch_utils import apply_freeu
 
 
@@ -1211,7 +1211,7 @@ def forward(
 
         # 3. down
         lora_scale = cross_attention_kwargs.get("scale", 1.0) if cross_attention_kwargs is not None else 1.0
-        if self.use_peft_backend:
+        if USE_PEFT_BACKEND:
             # weight the lora layers by setting `lora_scale` for each PEFT layer
             scale_lora_layers(self, lora_scale)
 
@@ -1313,7 +1313,7 @@ def forward(
             sample = self.conv_act(sample)
         sample = self.conv_out(sample)
 
-        if self.use_peft_backend:
+        if USE_PEFT_BACKEND:
             # remove `lora_scale` from each PEFT layer
             unscale_lora_layers(self)
 

From 61737cfe1e586c4996e2b4c58910c77984f06845 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Thu, 12 Oct 2023 14:56:59 +0000
Subject: [PATCH 117/134] fix few nits

---
 src/diffusers/loaders.py | 28 +++++++++++++---------------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index a33308fa0c93..5a0474f78657 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -708,7 +708,7 @@ def _unfuse_lora_apply(self, module):
     def set_adapters(
         self,
         adapter_names: Union[List[str], str],
-        weights: Optional[List[float]] = None,
+        weights: Optional[Union[List[float], float]]= None,
     ):
         """
         Sets the adapter layers for the unet.
@@ -716,27 +716,25 @@ def set_adapters(
         Args:
             adapter_names (`List[str]` or `str`):
                 The names of the adapters to use.
-            weights (`List[float]`, *optional*):
+            weights (`Union[List[float], float]`, *optional*):
                 The adapter(s) weights to use with the UNet. If `None`, the weights are set to `1.0` for all the
                 adapters.
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for `set_adapters()`.")
 
-        def process_weights(adapter_names, weights):
-            if weights is None:
-                weights = [1.0] * len(adapter_names)
-            elif isinstance(weights, float):
-                weights = [weights]
-
-            if len(adapter_names) != len(weights):
-                raise ValueError(
-                    f"Length of adapter names {len(adapter_names)} is not equal to the length of their weights {len(weights)}."
-                )
-            return weights
-
         adapter_names = [adapter_names] if isinstance(adapter_names, str) else adapter_names
-        weights = process_weights(adapter_names, weights)
+
+        if weights is None:
+            weights = [1.0] * len(adapter_names)
+        elif isinstance(weights, float):
+            weights = [weights] * len(adapter_names)
+        
+        if len(adapter_names) != len(weights):
+            raise ValueError(
+                f"Length of adapter names {len(adapter_names)} is not equal to the length of their weights {len(weights)}."
+            )
+        
         set_weights_and_activate_adapters(self, adapter_names, weights)
 
     def disable_lora(self):

From b2150d9f8059aaec86a3239b8dd1bd3fb720d1d2 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Thu, 12 Oct 2023 15:09:47 +0000
Subject: [PATCH 118/134] change a bit offloading logic

---
 src/diffusers/loaders.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index 5a0474f78657..afc7263f8986 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -739,7 +739,7 @@ def set_adapters(
 
     def disable_lora(self):
         """
-        Disables the LoRA layers for the unet.
+        Disables the active LoRA layers for the unet.
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
@@ -747,7 +747,7 @@ def disable_lora(self):
 
     def enable_lora(self):
         """
-        Enables the LoRA layers for the unet.
+        Enables the active LoRA layers for the unet.
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
@@ -1540,8 +1540,11 @@ def _optionally_disable_offloading(cls, _pipeline):
             for _, component in _pipeline.components.items():
                 if isinstance(component, nn.Module):
                     if hasattr(component, "_hf_hook"):
-                        is_model_cpu_offload = isinstance(getattr(component, "_hf_hook"), CpuOffload)
-                        is_sequential_cpu_offload = isinstance(getattr(component, "_hf_hook"), AlignDevicesHook)
+                        
+                        if not is_model_cpu_offload:
+                            is_model_cpu_offload = isinstance(component._hf_hook, CpuOffload)
+                        if not is_sequential_cpu_offload:
+                            is_sequential_cpu_offload = isinstance(component._hf_hook, AlignDevicesHook)
 
                         logger.info(
                             "Accelerate hooks detected. Since you have called `load_lora_weights()`, the previous hooks will be first removed. Then the LoRA parameters will be loaded and the hooks will be applied again."

From 352118857aad01cee61b73d89a2c177d8e300508 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Thu, 12 Oct 2023 15:10:42 +0000
Subject: [PATCH 119/134] check earlier

---
 src/diffusers/loaders.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index afc7263f8986..5f842218518c 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -1540,7 +1540,7 @@ def _optionally_disable_offloading(cls, _pipeline):
             for _, component in _pipeline.components.items():
                 if isinstance(component, nn.Module):
                     if hasattr(component, "_hf_hook"):
-                        
+
                         if not is_model_cpu_offload:
                             is_model_cpu_offload = isinstance(component._hf_hook, CpuOffload)
                         if not is_sequential_cpu_offload:
@@ -1606,6 +1606,11 @@ def load_lora_into_unet(
         if USE_PEFT_BACKEND and len(state_dict.keys()) > 0:
             from peft import LoraConfig, inject_adapter_in_model, set_peft_model_state_dict
 
+            if adapter_name in getattr(unet, "peft_config", {}):
+                raise ValueError(
+                    f"Adapter name {adapter_name} already in use in the Unet - please select a new adapter name."
+                )
+
             state_dict = convert_unet_state_dict_to_peft(state_dict)
 
             if network_alphas is not None:
@@ -1629,11 +1634,6 @@ def load_lora_into_unet(
             # otherwise loading LoRA weights will lead to an error
             is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline)
 
-            if hasattr(unet, "peft_config") and adapter_name in list(unet.peft_config.keys()):
-                raise ValueError(
-                    f"Adapter name {adapter_name} already in use in the Unet - please select a new adapter name."
-                )
-
             inject_adapter_in_model(lora_config, unet, adapter_name=adapter_name)
             incompatible_keys = set_peft_model_state_dict(unet, state_dict, adapter_name)
 

From d03d1a3892f3f6b370f9827f4eca8d4d69f396c5 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Thu, 12 Oct 2023 15:15:49 +0000
Subject: [PATCH 120/134] rm unneeded block

---
 src/diffusers/loaders.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index 5f842218518c..fc42b5a23c0a 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -1645,9 +1645,6 @@ def load_lora_into_unet(
                         f"Loading adapter weights from state_dict led to unexpected keys not found in the model: "
                         f" {unexpected_keys}. "
                     )
-                elif unexpected_keys is not None:
-                    # At this point all LoRA layars has been loaded so we init back an empty state_dict
-                    state_dict = {}
 
             # Offload back.
             if is_model_cpu_offload:

From fabb521cbd471dd18595976c0fb3b4df93316edf Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Thu, 12 Oct 2023 15:16:20 +0000
Subject: [PATCH 121/134] break long line

---
 src/diffusers/loaders.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index fc42b5a23c0a..d7bb6321441a 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -2241,8 +2241,8 @@ def unload_lora_weights(self):
         if not USE_PEFT_BACKEND:
             if version.parse(__version__) > version.parse("0.23"):
                 logger.warn(
-                    "You are using `unload_lora_weights` to disable and unload lora weights. If you want to iteratively enable and disable adapter weights, you can use `pipe.enable_lora()` or `pipe.disable_lora()`. After"
-                    " installing the latest version of PEFT."
+                    "You are using `unload_lora_weights` to disable and unload lora weights. If you want to iteratively enable and disable adapter weights,"
+                    "you can use `pipe.enable_lora()` or `pipe.disable_lora()`. After installing the latest version of PEFT."
                 )
 
             for _, module in self.unet.named_modules():

From fc55a1a6ea396257f2aeae3409754d1e91729151 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Thu, 12 Oct 2023 15:17:46 +0000
Subject: [PATCH 122/134] return empty list

---
 src/diffusers/loaders.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index d7bb6321441a..e5d75d4108f6 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -2502,7 +2502,7 @@ def enable_lora(self):
         if hasattr(self, "text_encoder_2"):
             self.enable_lora_for_text_encoder(self.text_encoder_2)
 
-    def get_active_adapters(self) -> Optional[List[str]]:
+    def get_active_adapters(self) -> List[str]:
         """
         Gets the list of the current active adapters.
 
@@ -2525,7 +2525,7 @@ def get_active_adapters(self) -> Optional[List[str]]:
 
         from peft.tuners.tuners_utils import BaseTunerLayer
 
-        active_adapters = None
+        active_adapters = []
 
         for module in self.unet.modules():
             if isinstance(module, BaseTunerLayer):

From 7106b22404bf6cd6ef551829323991c3b9fd9d84 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Thu, 12 Oct 2023 15:19:27 +0000
Subject: [PATCH 123/134] change logic a bit and address comments

---
 src/diffusers/loaders.py | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index e5d75d4108f6..c9b5f1fb972c 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -2561,8 +2561,6 @@ def set_lora_device(self, adapter_names: List[str], device: Union[torch.device,
         Sets the LoRAs listed in `adapter_names` into a target device. Useful for offloading the LoRA in the CPU in
         case you want to load multiple adapters and free some GPU memory.
 
-        This is specific to LoRA and needs some tweaking for new adapter architectures (IA3, AdaLora).
-
         Args:
             adapter_names (`List[str]`):
                 List of adapters to send device to.
@@ -2577,11 +2575,8 @@ def set_lora_device(self, adapter_names: List[str], device: Union[torch.device,
         # Handle the UNET
         for unet_module in self.unet.modules():
             if isinstance(unet_module, BaseTunerLayer):
-                # loop over submodules
-                for name, unet_submodule in unet_module.named_children():
-                    if name in ["lora_A", "lora_B"]:
-                        for adapter_name in adapter_names:
-                            unet_submodule[adapter_name].to(device)
+                unet_module.lora_A[adapter_name].to(device)
+                unet_module.lora_B[adapter_name].to(device)
 
         # Handle the text encoder
         modules_to_process = []
@@ -2595,11 +2590,8 @@ def set_lora_device(self, adapter_names: List[str], device: Union[torch.device,
             # loop over submodules
             for text_encoder_module in text_encoder.modules():
                 if isinstance(text_encoder_module, BaseTunerLayer):
-                    # loop over submodules
-                    for name, text_encoder_submodule in text_encoder_module.named_children():
-                        if name in ["lora_A", "lora_B"]:
-                            for adapter_name in adapter_names:
-                                text_encoder_submodule[adapter_name].to(device)
+                    text_encoder_module.lora_A[adapter_name].to(device)
+                    text_encoder_module.lora_B[adapter_name].to(device)
 
 
 class FromSingleFileMixin:

From 1834f8e7df1ea130da4dacfa4bf900042674417b Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Thu, 12 Oct 2023 15:20:22 +0000
Subject: [PATCH 124/134] add typehint

---
 src/diffusers/loaders.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index c9b5f1fb972c..060fcc675dec 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -3178,7 +3178,7 @@ class StableDiffusionXLLoraLoaderMixin(LoraLoaderMixin):
 
     # Overrride to properly handle the loading and unloading of the additional text encoder.
     def load_lora_weights(
-        self, pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]], adapter_name=None, **kwargs
+        self, pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]], adapter_name: Optional[str] = None, **kwargs
     ):
         """
         Load LoRA weights specified in `pretrained_model_name_or_path_or_dict` into `self.unet` and

From 21a8d6c2ac362f3148b86c407e9e8ca968c4b086 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Thu, 12 Oct 2023 15:21:09 +0000
Subject: [PATCH 125/134] remove parenthesis

---
 src/diffusers/models/modeling_utils.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
index c889469030ba..7639f75152a5 100644
--- a/src/diffusers/models/modeling_utils.py
+++ b/src/diffusers/models/modeling_utils.py
@@ -298,8 +298,7 @@ def disable_xformers_memory_efficient_attention(self):
     def add_adapter(self, adapter_config, adapter_name: str = "default") -> None:
         r"""
         Adds a new adapter to the current model for training. If no adapter name is passed, a default name is assigned
-        to the adapter to follow the convention of the PEFT library (in PEFT we use `"default"` as the default adapter
-        name).
+        to the adapter to follow the convention of the PEFT library.
 
         If you are not familiar with adapters and PEFT methods, we invite you to read more about them in the PEFT
         [documentation](https://huggingface.co/docs/peft).

From 44f658da803ad81720168eb78ad632128f8b17d6 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Thu, 12 Oct 2023 15:24:23 +0000
Subject: [PATCH 126/134] fix

---
 src/diffusers/loaders.py | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index 060fcc675dec..a77619bbc2a6 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -708,7 +708,7 @@ def _unfuse_lora_apply(self, module):
     def set_adapters(
         self,
         adapter_names: Union[List[str], str],
-        weights: Optional[Union[List[float], float]]= None,
+        weights: Optional[Union[List[float], float]] = None,
     ):
         """
         Sets the adapter layers for the unet.
@@ -729,12 +729,12 @@ def set_adapters(
             weights = [1.0] * len(adapter_names)
         elif isinstance(weights, float):
             weights = [weights] * len(adapter_names)
-        
+
         if len(adapter_names) != len(weights):
             raise ValueError(
                 f"Length of adapter names {len(adapter_names)} is not equal to the length of their weights {len(weights)}."
             )
-        
+
         set_weights_and_activate_adapters(self, adapter_names, weights)
 
     def disable_lora(self):
@@ -1540,7 +1540,6 @@ def _optionally_disable_offloading(cls, _pipeline):
             for _, component in _pipeline.components.items():
                 if isinstance(component, nn.Module):
                     if hasattr(component, "_hf_hook"):
-
                         if not is_model_cpu_offload:
                             is_model_cpu_offload = isinstance(component._hf_hook, CpuOffload)
                         if not is_sequential_cpu_offload:
@@ -2575,8 +2574,9 @@ def set_lora_device(self, adapter_names: List[str], device: Union[torch.device,
         # Handle the UNET
         for unet_module in self.unet.modules():
             if isinstance(unet_module, BaseTunerLayer):
-                unet_module.lora_A[adapter_name].to(device)
-                unet_module.lora_B[adapter_name].to(device)
+                for adapter_name in adapter_names:
+                    unet_module.lora_A[adapter_name].to(device)
+                    unet_module.lora_B[adapter_name].to(device)
 
         # Handle the text encoder
         modules_to_process = []
@@ -2590,8 +2590,9 @@ def set_lora_device(self, adapter_names: List[str], device: Union[torch.device,
             # loop over submodules
             for text_encoder_module in text_encoder.modules():
                 if isinstance(text_encoder_module, BaseTunerLayer):
-                    text_encoder_module.lora_A[adapter_name].to(device)
-                    text_encoder_module.lora_B[adapter_name].to(device)
+                    for adapter_name in adapter_names:
+                        text_encoder_module.lora_A[adapter_name].to(device)
+                        text_encoder_module.lora_B[adapter_name].to(device)
 
 
 class FromSingleFileMixin:
@@ -3178,7 +3179,10 @@ class StableDiffusionXLLoraLoaderMixin(LoraLoaderMixin):
 
     # Overrride to properly handle the loading and unloading of the additional text encoder.
     def load_lora_weights(
-        self, pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]], adapter_name: Optional[str] = None, **kwargs
+        self,
+        pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]],
+        adapter_name: Optional[str] = None,
+        **kwargs,
     ):
         """
         Load LoRA weights specified in `pretrained_model_name_or_path_or_dict` into `self.unet` and

From 7fd50a7a15ace1db8ff67465e2109b672903daad Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Thu, 12 Oct 2023 16:24:00 +0000
Subject: [PATCH 127/134] revert to fp16 in tests

---
 tests/lora/test_lora_layers_old_backend.py | 6 ++----
 tests/lora/test_lora_layers_peft.py        | 6 ++----
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index bb085ecb505f..fdbbeeea5941 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -2301,15 +2301,13 @@ def test_sdxl_1_0_last_ben(self):
         self.assertTrue(np.allclose(images, expected, atol=1e-3))
 
     def test_sdxl_1_0_fuse_unfuse_all(self):
-        pipe = DiffusionPipeline.from_pretrained(
-            "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.bfloat16
-        )
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
         text_encoder_1_sd = copy.deepcopy(pipe.text_encoder.state_dict())
         text_encoder_2_sd = copy.deepcopy(pipe.text_encoder_2.state_dict())
         unet_sd = copy.deepcopy(pipe.unet.state_dict())
 
         pipe.load_lora_weights(
-            "davizca87/sun-flower", weight_name="snfw3rXL-000004.safetensors", torch_dtype=torch.bfloat16
+            "davizca87/sun-flower", weight_name="snfw3rXL-000004.safetensors", torch_dtype=torch.float16
         )
 
         fused_te_state_dict = pipe.text_encoder.state_dict()
diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index 0abd5f8fa1e1..198ff53340c8 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -1649,15 +1649,13 @@ def test_sdxl_1_0_last_ben(self):
         release_memory(pipe)
 
     def test_sdxl_1_0_fuse_unfuse_all(self):
-        pipe = DiffusionPipeline.from_pretrained(
-            "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.bfloat16
-        )
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
         text_encoder_1_sd = copy.deepcopy(pipe.text_encoder.state_dict())
         text_encoder_2_sd = copy.deepcopy(pipe.text_encoder_2.state_dict())
         unet_sd = copy.deepcopy(pipe.unet.state_dict())
 
         pipe.load_lora_weights(
-            "davizca87/sun-flower", weight_name="snfw3rXL-000004.safetensors", torch_dtype=torch.bfloat16
+            "davizca87/sun-flower", weight_name="snfw3rXL-000004.safetensors", torch_dtype=torch.float16
         )
 
         fused_te_state_dict = pipe.text_encoder.state_dict()

From e92c6def15d0400093212e840bf3a8fbd854c091 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Thu, 12 Oct 2023 16:27:38 +0000
Subject: [PATCH 128/134] add to gpu

---
 tests/lora/test_lora_layers_old_backend.py | 4 +++-
 tests/lora/test_lora_layers_peft.py        | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index fdbbeeea5941..69158120917d 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -2301,7 +2301,9 @@ def test_sdxl_1_0_last_ben(self):
         self.assertTrue(np.allclose(images, expected, atol=1e-3))
 
     def test_sdxl_1_0_fuse_unfuse_all(self):
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
+        pipe = DiffusionPipeline.from_pretrained(
+            "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
+        ).to(torch_device)
         text_encoder_1_sd = copy.deepcopy(pipe.text_encoder.state_dict())
         text_encoder_2_sd = copy.deepcopy(pipe.text_encoder_2.state_dict())
         unet_sd = copy.deepcopy(pipe.unet.state_dict())
diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index 198ff53340c8..fed06925ab63 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -1649,7 +1649,9 @@ def test_sdxl_1_0_last_ben(self):
         release_memory(pipe)
 
     def test_sdxl_1_0_fuse_unfuse_all(self):
-        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
+        pipe = DiffusionPipeline.from_pretrained(
+            "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
+        ).to(torch_device)
         text_encoder_1_sd = copy.deepcopy(pipe.text_encoder.state_dict())
         text_encoder_2_sd = copy.deepcopy(pipe.text_encoder_2.state_dict())
         unet_sd = copy.deepcopy(pipe.unet.state_dict())

From 4e382ee3c71606166e1ee747fe978b45b93b98da Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Fri, 13 Oct 2023 10:24:25 +0000
Subject: [PATCH 129/134] revert to old test

---
 tests/lora/test_lora_layers_old_backend.py | 2 +-
 tests/lora/test_lora_layers_peft.py        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index 69158120917d..bd4e29c54f94 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -2303,7 +2303,7 @@ def test_sdxl_1_0_last_ben(self):
     def test_sdxl_1_0_fuse_unfuse_all(self):
         pipe = DiffusionPipeline.from_pretrained(
             "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
-        ).to(torch_device)
+        )
         text_encoder_1_sd = copy.deepcopy(pipe.text_encoder.state_dict())
         text_encoder_2_sd = copy.deepcopy(pipe.text_encoder_2.state_dict())
         unet_sd = copy.deepcopy(pipe.unet.state_dict())
diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index fed06925ab63..40454f005a50 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -1651,7 +1651,7 @@ def test_sdxl_1_0_last_ben(self):
     def test_sdxl_1_0_fuse_unfuse_all(self):
         pipe = DiffusionPipeline.from_pretrained(
             "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
-        ).to(torch_device)
+        )
         text_encoder_1_sd = copy.deepcopy(pipe.text_encoder.state_dict())
         text_encoder_2_sd = copy.deepcopy(pipe.text_encoder_2.state_dict())
         unet_sd = copy.deepcopy(pipe.unet.state_dict())

From 6ae767fc8dc8ad40463370ded02e116c011d3376 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Fri, 13 Oct 2023 10:25:57 +0000
Subject: [PATCH 130/134] style

---
 tests/lora/test_lora_layers_old_backend.py | 4 +---
 tests/lora/test_lora_layers_peft.py        | 4 +---
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index bd4e29c54f94..fdbbeeea5941 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -2301,9 +2301,7 @@ def test_sdxl_1_0_last_ben(self):
         self.assertTrue(np.allclose(images, expected, atol=1e-3))
 
     def test_sdxl_1_0_fuse_unfuse_all(self):
-        pipe = DiffusionPipeline.from_pretrained(
-            "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
-        )
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
         text_encoder_1_sd = copy.deepcopy(pipe.text_encoder.state_dict())
         text_encoder_2_sd = copy.deepcopy(pipe.text_encoder_2.state_dict())
         unet_sd = copy.deepcopy(pipe.unet.state_dict())
diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index 40454f005a50..198ff53340c8 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -1649,9 +1649,7 @@ def test_sdxl_1_0_last_ben(self):
         release_memory(pipe)
 
     def test_sdxl_1_0_fuse_unfuse_all(self):
-        pipe = DiffusionPipeline.from_pretrained(
-            "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
-        )
+        pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
         text_encoder_1_sd = copy.deepcopy(pipe.text_encoder.state_dict())
         text_encoder_2_sd = copy.deepcopy(pipe.text_encoder_2.state_dict())
         unet_sd = copy.deepcopy(pipe.unet.state_dict())

From b4e13811cb093c0393980c05aa27ff8318a15f35 Mon Sep 17 00:00:00 2001
From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com>
Date: Fri, 13 Oct 2023 12:45:00 +0200
Subject: [PATCH 131/134] Update src/diffusers/loaders.py

Co-authored-by: Benjamin Bossan <BenjaminBossan@users.noreply.github.com>
---
 src/diffusers/loaders.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index a77619bbc2a6..ff8a486152b6 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -2557,7 +2557,7 @@ def get_list_adapters(self) -> Dict[str, List[str]]:
 
     def set_lora_device(self, adapter_names: List[str], device: Union[torch.device, str, int]) -> None:
         """
-        Sets the LoRAs listed in `adapter_names` into a target device. Useful for offloading the LoRA in the CPU in
+        Moves the LoRAs listed in `adapter_names` to a target device. Useful for offloading the LoRA to the CPU in
         case you want to load multiple adapters and free some GPU memory.
 
         Args:

From f708dba464144d9152d62c1c4620b816eefe8307 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Fri, 13 Oct 2023 10:46:48 +0000
Subject: [PATCH 132/134] change indent

---
 src/diffusers/loaders.py | 38 ++++++++++++++++++--------------------
 1 file changed, 18 insertions(+), 20 deletions(-)

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
index ff8a486152b6..695a22d955da 100644
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -533,15 +533,14 @@ def load_attn_procs(self, pretrained_model_name_or_path_or_dict: Union[str, Dict
         if not USE_PEFT_BACKEND:
             if _pipeline is not None:
                 for _, component in _pipeline.components.items():
-                    if isinstance(component, nn.Module):
-                        if hasattr(component, "_hf_hook"):
-                            is_model_cpu_offload = isinstance(getattr(component, "_hf_hook"), CpuOffload)
-                            is_sequential_cpu_offload = isinstance(getattr(component, "_hf_hook"), AlignDevicesHook)
+                    if isinstance(component, nn.Module) and hasattr(component, "_hf_hook"):
+                        is_model_cpu_offload = isinstance(getattr(component, "_hf_hook"), CpuOffload)
+                        is_sequential_cpu_offload = isinstance(getattr(component, "_hf_hook"), AlignDevicesHook)
 
-                            logger.info(
-                                "Accelerate hooks detected. Since you have called `load_lora_weights()`, the previous hooks will be first removed. Then the LoRA parameters will be loaded and the hooks will be applied again."
-                            )
-                            remove_hook_from_module(component, recurse=is_sequential_cpu_offload)
+                        logger.info(
+                            "Accelerate hooks detected. Since you have called `load_lora_weights()`, the previous hooks will be first removed. Then the LoRA parameters will be loaded and the hooks will be applied again."
+                        )
+                        remove_hook_from_module(component, recurse=is_sequential_cpu_offload)
 
             # only custom diffusion needs to set attn processors
             if is_custom_diffusion:
@@ -1538,17 +1537,16 @@ def _optionally_disable_offloading(cls, _pipeline):
 
         if _pipeline is not None:
             for _, component in _pipeline.components.items():
-                if isinstance(component, nn.Module):
-                    if hasattr(component, "_hf_hook"):
-                        if not is_model_cpu_offload:
-                            is_model_cpu_offload = isinstance(component._hf_hook, CpuOffload)
-                        if not is_sequential_cpu_offload:
-                            is_sequential_cpu_offload = isinstance(component._hf_hook, AlignDevicesHook)
+                if isinstance(component, nn.Module) and hasattr(component, "_hf_hook"):
+                    if not is_model_cpu_offload:
+                        is_model_cpu_offload = isinstance(component._hf_hook, CpuOffload)
+                    if not is_sequential_cpu_offload:
+                        is_sequential_cpu_offload = isinstance(component._hf_hook, AlignDevicesHook)
 
-                        logger.info(
-                            "Accelerate hooks detected. Since you have called `load_lora_weights()`, the previous hooks will be first removed. Then the LoRA parameters will be loaded and the hooks will be applied again."
-                        )
-                        remove_hook_from_module(component, recurse=is_sequential_cpu_offload)
+                    logger.info(
+                        "Accelerate hooks detected. Since you have called `load_lora_weights()`, the previous hooks will be first removed. Then the LoRA parameters will be loaded and the hooks will be applied again."
+                    )
+                    remove_hook_from_module(component, recurse=is_sequential_cpu_offload)
 
         return (is_model_cpu_offload, is_sequential_cpu_offload)
 
@@ -2557,8 +2555,8 @@ def get_list_adapters(self) -> Dict[str, List[str]]:
 
     def set_lora_device(self, adapter_names: List[str], device: Union[torch.device, str, int]) -> None:
         """
-        Moves the LoRAs listed in `adapter_names` to a target device. Useful for offloading the LoRA to the CPU in
-        case you want to load multiple adapters and free some GPU memory.
+        Moves the LoRAs listed in `adapter_names` to a target device. Useful for offloading the LoRA to the CPU in case
+        you want to load multiple adapters and free some GPU memory.
 
         Args:
             adapter_names (`List[str]`):

From f17206c014d4acd85ce1d17f29dc466a2f355d58 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Fri, 13 Oct 2023 16:21:38 +0200
Subject: [PATCH 133/134] Apply suggestions from code review

---
 tests/lora/test_lora_layers_old_backend.py | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index 5d8d0baca7df..bc70fcb4f2fd 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -2320,22 +2320,13 @@ def test_sdxl_1_0_fuse_unfuse_all(self):
         pipe.load_lora_weights(
             "davizca87/sun-flower", weight_name="snfw3rXL-000004.safetensors", torch_dtype=torch.float16
         )
-
-        fused_te_state_dict = pipe.text_encoder.state_dict()
-        fused_te_2_state_dict = pipe.text_encoder_2.state_dict()
-        unet_state_dict = pipe.unet.state_dict()
-
-        for key, value in text_encoder_1_sd.items():
-            self.assertTrue(torch.allclose(fused_te_state_dict[key], value))
-
-        for key, value in text_encoder_2_sd.items():
-            self.assertTrue(torch.allclose(fused_te_2_state_dict[key], value))
-
-        for key, value in unet_state_dict.items():
-            self.assertTrue(torch.allclose(unet_state_dict[key], value))
-
         pipe.fuse_lora()
         pipe.unload_lora_weights()
+        pipe.unfuse_lora()
+
+        assert state_dicts_almost_equal(text_encoder_1_sd, pipe.text_encoder.state_dict())
+        assert state_dicts_almost_equal(text_encoder_2_sd, pipe.text_encoder_2.state_dict())
+        assert state_dicts_almost_equal(unet_sd, pipe.unet.state_dict())
 
         assert not state_dicts_almost_equal(text_encoder_1_sd, pipe.text_encoder.state_dict())
         assert not state_dicts_almost_equal(text_encoder_2_sd, pipe.text_encoder_2.state_dict())

From 950d19c197c4116ab345803eedd0c3365f57167b Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Fri, 13 Oct 2023 16:22:14 +0200
Subject: [PATCH 134/134] Apply suggestions from code review

---
 tests/lora/test_lora_layers_old_backend.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py
index bc70fcb4f2fd..047cdddfa95a 100644
--- a/tests/lora/test_lora_layers_old_backend.py
+++ b/tests/lora/test_lora_layers_old_backend.py
@@ -2328,10 +2328,6 @@ def test_sdxl_1_0_fuse_unfuse_all(self):
         assert state_dicts_almost_equal(text_encoder_2_sd, pipe.text_encoder_2.state_dict())
         assert state_dicts_almost_equal(unet_sd, pipe.unet.state_dict())
 
-        assert not state_dicts_almost_equal(text_encoder_1_sd, pipe.text_encoder.state_dict())
-        assert not state_dicts_almost_equal(text_encoder_2_sd, pipe.text_encoder_2.state_dict())
-        assert not state_dicts_almost_equal(unet_sd, pipe.unet.state_dict())
-
     def test_sdxl_1_0_lora_with_sequential_cpu_offloading(self):
         generator = torch.Generator().manual_seed(0)
 
@@ -2339,7 +2335,6 @@ def test_sdxl_1_0_lora_with_sequential_cpu_offloading(self):
         pipe.enable_sequential_cpu_offload()
         lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
         lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
-
         pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
 
         images = pipe(