From 8404d7b02dc9b0a94571761748c5cdaa9afedf47 Mon Sep 17 00:00:00 2001
From: UmerHA <40663591+UmerHA@users.noreply.github.com>
Date: Fri, 15 Mar 2024 18:22:12 +0100
Subject: [PATCH 01/25] Initial commit
---
Pipfile | 11 ++++
docs/source/en/_toctree.yml | 8 +--
src/diffusers/loaders/lora.py | 18 ++++--
src/diffusers/loaders/unet.py | 95 +++++++++++++++++++++++++++++++
src/diffusers/utils/peft_utils.py | 11 +++-
5 files changed, 133 insertions(+), 10 deletions(-)
create mode 100644 Pipfile
diff --git a/Pipfile b/Pipfile
new file mode 100644
index 000000000000..0757494bb360
--- /dev/null
+++ b/Pipfile
@@ -0,0 +1,11 @@
+[[source]]
+url = "https://pypi.org/simple"
+verify_ssl = true
+name = "pypi"
+
+[packages]
+
+[dev-packages]
+
+[requires]
+python_version = "3.11"
diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml
index 15f4f460458e..29bd65fb4dba 100644
--- a/docs/source/en/_toctree.yml
+++ b/docs/source/en/_toctree.yml
@@ -400,14 +400,14 @@
title: DPMSolverSDEScheduler
- local: api/schedulers/singlestep_dpm_solver
title: DPMSolverSinglestepScheduler
+ - local: api/schedulers/edm_multistep_dpm_solver
+ title: EDMDPMSolverMultistepScheduler
+ - local: api/schedulers/edm_euler
+ title: EDMEulerScheduler
- local: api/schedulers/euler_ancestral
title: EulerAncestralDiscreteScheduler
- local: api/schedulers/euler
title: EulerDiscreteScheduler
- - local: api/schedulers/edm_euler
- title: EDMEulerScheduler
- - local: api/schedulers/edm_multistep_dpm_solver
- title: EDMDPMSolverMultistepScheduler
- local: api/schedulers/heun
title: HeunDiscreteScheduler
- local: api/schedulers/ipndm
diff --git a/src/diffusers/loaders/lora.py b/src/diffusers/loaders/lora.py
index c6077f3a8e25..c6336b62a07c 100644
--- a/src/diffusers/loaders/lora.py
+++ b/src/diffusers/loaders/lora.py
@@ -1033,17 +1033,27 @@ def enable_lora_for_text_encoder(self, text_encoder: Optional["PreTrainedModel"]
def set_adapters(
self,
adapter_names: Union[List[str], str],
- adapter_weights: Optional[List[float]] = None,
+ adapter_weights: Optional[Union[List[float], List[Dict]]] = None,
):
+ unet_weights, text_encoder_weights = [], []
+
+ for weights in adapter_weights:
+ if isinstance(weights, dict):
+ unet_weights.append({k: v for k, v in weights.items() if k != "text_encoder"})
+ text_encoder_weights.append(weights["text_encoder"])
+ else:
+ unet_weights.append(weights)
+ text_encoder_weights.append(weights)
+
unet = getattr(self, self.unet_name) if not hasattr(self, "unet") else self.unet
# Handle the UNET
- unet.set_adapters(adapter_names, adapter_weights)
+ unet.set_adapters(adapter_names, unet_weights)
# Handle the Text Encoder
if hasattr(self, "text_encoder"):
- self.set_adapters_for_text_encoder(adapter_names, self.text_encoder, adapter_weights)
+ self.set_adapters_for_text_encoder(adapter_names, self.text_encoder, text_encoder_weights)
if hasattr(self, "text_encoder_2"):
- self.set_adapters_for_text_encoder(adapter_names, self.text_encoder_2, adapter_weights)
+ self.set_adapters_for_text_encoder(adapter_names, self.text_encoder_2, text_encoder_weights)
def disable_lora(self):
if not USE_PEFT_BACKEND:
diff --git a/src/diffusers/loaders/unet.py b/src/diffusers/loaders/unet.py
index 0a9544d0dbbe..1248cc769ba6 100644
--- a/src/diffusers/loaders/unet.py
+++ b/src/diffusers/loaders/unet.py
@@ -561,6 +561,88 @@ def _unfuse_lora_apply(self, module):
if isinstance(module, BaseTunerLayer):
module.unmerge()
+ @staticmethod
+ def _expand_lora_weight_dict(
+ weights, blocks_with_transformer: Dict[str, int], transformer_per_block: Dict[str, int]
+ ):
+ """
+ Expand input into a weight dict with a weight per transformer
+
+ Parameters:
+ blocks_with_transformer (`Dict[str, int]`):
+ Dict with keys 'up' and 'down', showing which blocks have transformer layers
+ transformer_per_block (`Dict[str, int]`):
+ Dict with keys 'up' and 'down', showing how many transformer layers each block has
+
+ E.g. turns
+ {
+ 'down': 2,
+ 'mid': 3,
+ 'up': {
+ 'block_1': 4,
+ 'block_2': [5, 6, 7]
+ }
+ }
+ into
+ {
+ 'down.block_1.0': 2,
+ 'down.block_1.1': 2,
+ 'down.block_2.0': 2,
+ 'down.block_2.1': 2,
+ 'mid': 3,
+ 'up.block_0.0': 4,
+ 'up.block_0.1': 4,
+ 'up.block_0.2': 4,
+ 'up.block_1.0': 5,
+ 'up.block_1.1': 6,
+ 'up.block_1.2': 7,
+ }
+ """
+ number = (float, int)
+
+ if sorted(blocks_with_transformer.keys()) != ["down", "up"]:
+ raise ValueError("blocks_with_transformer needs to be a dict with keys `'down' and `'up'`")
+
+ if sorted(transformer_per_block.keys()) != ["down", "up"]:
+ raise ValueError("transformer_per_block needs to be a dict with keys `'down' and `'up'`")
+
+ if isinstance(weights, number):
+ weights = {o: weights for o in ["down", "mid", "up"]}
+
+ for updown in ["up", "down"]:
+ # eg {"down": 1} to {"down": {"block_1": 1, "block_2": 1}}}
+ if isinstance(weights[updown], number):
+ weights[updown] = {f"block_{i}": weights[updown] for i in blocks_with_transformer[updown]}
+
+ # eg {"down": "block_1": 1}} to {"down": "block_1": [1, 1]}}
+ for i in blocks_with_transformer[updown]:
+ block = f"block_{i}"
+ if isinstance(weights[updown][block], number):
+ weights[updown][block] = [weights[updown][block] for _ in range(transformer_per_block[updown])]
+
+ # eg {"down": "block_1": [1, 1]}} to {"down.block_1.0": 1, "down.block_1.1": 1}
+ for i in blocks_with_transformer[updown]:
+ block = f"block_{i}"
+ for tf_idx, value in enumerate(weights[updown][block]):
+ weights[f"{updown}.{block}.{tf_idx}"] = value
+
+ del weights[updown]
+
+ def layer_name(name):
+ """Translate user-friendly name (e.g. 'mid') into actual layer name (e.g. 'mid_block.attentions.0')"""
+ if name == "mid":
+ return "mid_block.attentions.0"
+
+ updown, block, attn = name.split(".")
+
+ updown = updown.replace("down", "down_blocks").replace("up", "up_blocks")
+ block = block.replace("block_", "")
+ attn = "attentions." + attn
+
+ return ".".join((updown, block, attn))
+
+ return {layer_name(name): weight for name, weight in weights}
+
def set_adapters(
self,
adapter_names: Union[List[str], str],
@@ -607,6 +689,19 @@ def set_adapters(
f"Length of adapter names {len(adapter_names)} is not equal to the length of their weights {len(weights)}."
)
+ blocks_with_transformer = {
+ "down": [i for i, block in enumerate(self.down_blocks) if hasattr(block, "attentions")],
+ "up": [i for i, block in enumerate(self.up_blocks) if hasattr(block, "attentions")],
+ }
+ transformer_per_block = {"down": self.config.layers_per_block, "up": self.config.layers_per_block + 1}
+
+ weights = [
+ UNet2DConditionLoadersMixin._expand_lora_weight_dict(
+ weight_for_adapter, blocks_with_transformer, transformer_per_block
+ )
+ for weight_for_adapter in weights
+ ]
+
set_weights_and_activate_adapters(self, adapter_names, weights)
def disable_lora(self):
diff --git a/src/diffusers/utils/peft_utils.py b/src/diffusers/utils/peft_utils.py
index 85d16c7b5821..3acda5a2ab6a 100644
--- a/src/diffusers/utils/peft_utils.py
+++ b/src/diffusers/utils/peft_utils.py
@@ -227,16 +227,23 @@ def delete_adapter_layers(model, adapter_name):
def set_weights_and_activate_adapters(model, adapter_names, weights):
from peft.tuners.tuners_utils import BaseTunerLayer
+ def get_module_weight(weight_for_adapter, module_name):
+ for layer_name, weight_ in weight_for_adapter.items():
+ if layer_name in module_name:
+ print(f"Setting weight to {weight_} as {layer_name} in {module_name}")
+ return weight_
+ raise RuntimeError(f"No LoRA weight found for module {module_name}, which should never happen.")
+
# iterate over each adapter, make it active and set the corresponding scaling weight
for adapter_name, weight in zip(adapter_names, weights):
- for module in model.modules():
+ for module_name, module in model.named_modules():
if isinstance(module, BaseTunerLayer):
# For backward compatbility with previous PEFT versions
if hasattr(module, "set_adapter"):
module.set_adapter(adapter_name)
else:
module.active_adapter = adapter_name
- module.set_scale(adapter_name, weight)
+ module.set_scale(adapter_name, get_module_weight(weight, module_name))
# set multiple active adapters
for module in model.modules():
From 84125dfcf83a11af8665a47a3062e14b2c00f034 Mon Sep 17 00:00:00 2001
From: UmerHA <40663591+UmerHA@users.noreply.github.com>
Date: Sat, 16 Mar 2024 01:17:45 +0100
Subject: [PATCH 02/25] Implemented block lora
- implemented block lora
- updated docs
- added tests
---
.../en/using-diffusers/loading_adapters.md | 15 ++
src/diffusers/loaders/lora.py | 34 ++-
src/diffusers/loaders/unet.py | 44 ++--
src/diffusers/utils/peft_utils.py | 1 -
tests/lora/test_lora_layers_peft.py | 207 ++++++++++++++++++
5 files changed, 274 insertions(+), 27 deletions(-)
diff --git a/docs/source/en/using-diffusers/loading_adapters.md b/docs/source/en/using-diffusers/loading_adapters.md
index b59b46aeba51..3b9231ec0c60 100644
--- a/docs/source/en/using-diffusers/loading_adapters.md
+++ b/docs/source/en/using-diffusers/loading_adapters.md
@@ -157,6 +157,21 @@ image
For both [`~loaders.LoraLoaderMixin.load_lora_weights`] and [`~loaders.UNet2DConditionLoadersMixin.load_attn_procs`], you can pass the `cross_attention_kwargs={"scale": 0.5}` parameter to adjust how much of the LoRA weights to use. A value of `0` is the same as only using the base model weights, and a value of `1` is equivalent to using the fully finetuned LoRA.
+For fine-grained control on how much of the LoRA weights are used, use [`~loaders.LoraLoaderMixin.set_adapters`]. Here, you can define scale of any granularity up to per-transformer.
+```python
+pipe = ... # create pipeline
+pipe.load_lora_weights(..., adapter_name="my_adapter")
+scales = {
+ "text_encoder": 0.5,
+ "down": 0.9, # all transformers in the down-part will use scale 0.9
+ # "mid" # because "mid" is not given, all transformers in the mid part will use the default scale 1.0
+ "up": {
+ "block_0": 0.6, # all 3 transformers in the 0th block in the up-part will use scale 0.6
+ "block_1": [0.4, 0.8, 1.0], # the 3 transformers in the 1st block in the up-part will use scales 0.4m 0.8 and 1.0 respectively
+ }
+}
+pipe.load_lora_weights("my_adapter", scales)
+```
To unload the LoRA weights, use the [`~loaders.LoraLoaderMixin.unload_lora_weights`] method to discard the LoRA weights and restore the model to its original weights:
diff --git a/src/diffusers/loaders/lora.py b/src/diffusers/loaders/lora.py
index c6336b62a07c..f16ca5deee2d 100644
--- a/src/diffusers/loaders/lora.py
+++ b/src/diffusers/loaders/lora.py
@@ -959,7 +959,7 @@ def set_adapters_for_text_encoder(
self,
adapter_names: Union[List[str], str],
text_encoder: Optional["PreTrainedModel"] = None, # noqa: F821
- text_encoder_weights: List[float] = None,
+ text_encoder_weights: Optional[Union[float, List[float]]] = None,
):
"""
Sets the adapter layers for the text encoder.
@@ -986,6 +986,9 @@ def process_weights(adapter_names, weights):
raise ValueError(
f"Length of adapter names {len(adapter_names)} is not equal to the length of the weights {len(weights)}"
)
+
+ weights = [{"text_model": w} if w is not None else {"text_model": 1.0} for w in weights]
+
return weights
adapter_names = [adapter_names] if isinstance(adapter_names, str) else adapter_names
@@ -1035,15 +1038,28 @@ def set_adapters(
adapter_names: Union[List[str], str],
adapter_weights: Optional[Union[List[float], List[Dict]]] = None,
):
- unet_weights, text_encoder_weights = [], []
+ adapter_names = [adapter_names] if isinstance(adapter_names, str) else adapter_names
- for weights in adapter_weights:
- if isinstance(weights, dict):
- unet_weights.append({k: v for k, v in weights.items() if k != "text_encoder"})
- text_encoder_weights.append(weights["text_encoder"])
- else:
- unet_weights.append(weights)
- text_encoder_weights.append(weights)
+ if adapter_weights is None:
+ unet_weights, text_encoder_weights = None, None
+ else:
+ if isinstance(adapter_weights, (float, dict)):
+ adapter_weights = [adapter_weights] * len(adapter_names)
+
+ unet_weights, text_encoder_weights = [], []
+ for weights in adapter_weights:
+ if isinstance(weights, dict):
+ unet_weight = {k: v for k, v in weights.items() if k != "text_encoder"}
+ if len(unet_weight) == 0:
+ unet_weight = None
+ text_encoder_weight = weights.get("text_encoder", None)
+
+ unet_weights.append(unet_weight)
+ text_encoder_weights.append(text_encoder_weight)
+
+ else:
+ unet_weights.append(weights)
+ text_encoder_weights.append(weights)
unet = getattr(self, self.unet_name) if not hasattr(self, "unet") else self.unet
# Handle the UNET
diff --git a/src/diffusers/loaders/unet.py b/src/diffusers/loaders/unet.py
index 1248cc769ba6..841760dce45a 100644
--- a/src/diffusers/loaders/unet.py
+++ b/src/diffusers/loaders/unet.py
@@ -561,9 +561,8 @@ def _unfuse_lora_apply(self, module):
if isinstance(module, BaseTunerLayer):
module.unmerge()
- @staticmethod
- def _expand_lora_weight_dict(
- weights, blocks_with_transformer: Dict[str, int], transformer_per_block: Dict[str, int]
+ def _expand_lora_scales_dict(
+ self, scales, blocks_with_transformer: Dict[str, int], transformer_per_block: Dict[str, int]
):
"""
Expand input into a weight dict with a weight per transformer
@@ -606,27 +605,33 @@ def _expand_lora_weight_dict(
if sorted(transformer_per_block.keys()) != ["down", "up"]:
raise ValueError("transformer_per_block needs to be a dict with keys `'down' and `'up'`")
- if isinstance(weights, number):
- weights = {o: weights for o in ["down", "mid", "up"]}
+ if isinstance(scales, number):
+ scales = {o: scales for o in ["down", "mid", "up"]}
+
+ if "mid" not in scales:
+ scales["mid"] = 1
for updown in ["up", "down"]:
+ if updown not in scales:
+ scales[updown] = 1
+
# eg {"down": 1} to {"down": {"block_1": 1, "block_2": 1}}}
- if isinstance(weights[updown], number):
- weights[updown] = {f"block_{i}": weights[updown] for i in blocks_with_transformer[updown]}
+ if isinstance(scales[updown], number):
+ scales[updown] = {f"block_{i}": scales[updown] for i in blocks_with_transformer[updown]}
# eg {"down": "block_1": 1}} to {"down": "block_1": [1, 1]}}
for i in blocks_with_transformer[updown]:
block = f"block_{i}"
- if isinstance(weights[updown][block], number):
- weights[updown][block] = [weights[updown][block] for _ in range(transformer_per_block[updown])]
+ if isinstance(scales[updown][block], number):
+ scales[updown][block] = [scales[updown][block] for _ in range(transformer_per_block[updown])]
# eg {"down": "block_1": [1, 1]}} to {"down.block_1.0": 1, "down.block_1.1": 1}
for i in blocks_with_transformer[updown]:
block = f"block_{i}"
- for tf_idx, value in enumerate(weights[updown][block]):
- weights[f"{updown}.{block}.{tf_idx}"] = value
+ for tf_idx, value in enumerate(scales[updown][block]):
+ scales[f"{updown}.{block}.{tf_idx}"] = value
- del weights[updown]
+ del scales[updown]
def layer_name(name):
"""Translate user-friendly name (e.g. 'mid') into actual layer name (e.g. 'mid_block.attentions.0')"""
@@ -641,7 +646,14 @@ def layer_name(name):
return ".".join((updown, block, attn))
- return {layer_name(name): weight for name, weight in weights}
+ state_dict = self.state_dict()
+ for layer in scales.keys():
+ if not any(layer_name(layer) in module for module in state_dict.keys()):
+ raise ValueError(
+ f"Can't set lora scale for layer {layer}. It either doesn't exist in this unet or has not attentions."
+ )
+
+ return {layer_name(name): weight for name, weight in scales.items()}
def set_adapters(
self,
@@ -681,7 +693,7 @@ def set_adapters(
if weights is None:
weights = [1.0] * len(adapter_names)
- elif isinstance(weights, float):
+ elif isinstance(weights, (float, dict)):
weights = [weights] * len(adapter_names)
if len(adapter_names) != len(weights):
@@ -696,9 +708,7 @@ def set_adapters(
transformer_per_block = {"down": self.config.layers_per_block, "up": self.config.layers_per_block + 1}
weights = [
- UNet2DConditionLoadersMixin._expand_lora_weight_dict(
- weight_for_adapter, blocks_with_transformer, transformer_per_block
- )
+ self._expand_lora_weight_dict(weight_for_adapter, blocks_with_transformer, transformer_per_block)
for weight_for_adapter in weights
]
diff --git a/src/diffusers/utils/peft_utils.py b/src/diffusers/utils/peft_utils.py
index 3acda5a2ab6a..affb8256be3d 100644
--- a/src/diffusers/utils/peft_utils.py
+++ b/src/diffusers/utils/peft_utils.py
@@ -230,7 +230,6 @@ def set_weights_and_activate_adapters(model, adapter_names, weights):
def get_module_weight(weight_for_adapter, module_name):
for layer_name, weight_ in weight_for_adapter.items():
if layer_name in module_name:
- print(f"Setting weight to {weight_} as {layer_name} in {module_name}")
return weight_
raise RuntimeError(f"No LoRA weight found for module {module_name}, which should never happen.")
diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index 67d28fe19e7e..4c741fde898c 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -19,6 +19,7 @@
import tempfile
import time
import unittest
+from itertools import product
import numpy as np
import torch
@@ -817,6 +818,212 @@ def test_simple_inference_with_text_unet_multi_adapter(self):
"output with no lora and output with lora disabled should give same results",
)
+ def test_simple_inference_with_text_unet_block_scale(self):
+ """
+ Tests a simple inference with lora attached to text encoder and unet, attaches
+ one adapter and set differnt weights for different blocks (i.e. block lora)
+ """
+ for scheduler_cls in [DDIMScheduler, LCMScheduler]:
+ components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls)
+ pipe = self.pipeline_class(**components)
+ pipe = pipe.to(self.torch_device)
+ pipe.set_progress_bar_config(disable=None)
+ _, _, inputs = self.get_dummy_inputs(with_generator=False)
+
+ output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
+
+ pipe.text_encoder.add_adapter(text_lora_config, "adapter-1")
+ pipe.unet.add_adapter(unet_lora_config, "adapter-1")
+
+ self.assertTrue(
+ self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
+ )
+ self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
+
+ if self.has_two_text_encoders:
+ pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1")
+ self.assertTrue(
+ self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
+ )
+
+ # todo umer: check all possible inputs options
+
+ weights_1 = {
+ "down": 5,
+ }
+ pipe.set_adapters("adapter-1", weights_1)
+ output_weights_1 = pipe(**inputs, generator=torch.manual_seed(0)).images
+
+ weights_2 = {
+ "up": 5,
+ }
+ pipe.set_adapters("adapter-1", weights_2)
+ output_weights_2 = pipe(**inputs, generator=torch.manual_seed(0)).images
+
+ self.assertFalse(
+ np.allclose(output_weights_1, output_weights_2, atol=1e-3, rtol=1e-3),
+ "LoRA weights 1 and 2 should give different results",
+ )
+ self.assertFalse(
+ np.allclose(output_no_lora, output_weights_1, atol=1e-3, rtol=1e-3),
+ "No adapter and LoRA weights 1 should give different results",
+ )
+ self.assertFalse(
+ np.allclose(output_no_lora, output_weights_2, atol=1e-3, rtol=1e-3),
+ "No adapter and LoRA weights 2 should give different results",
+ )
+
+ pipe.disable_lora()
+ output_disabled = pipe(**inputs, generator=torch.manual_seed(0)).images
+
+ self.assertTrue(
+ np.allclose(output_no_lora, output_disabled, atol=1e-3, rtol=1e-3),
+ "output with no lora and output with lora disabled should give same results",
+ )
+
+ def test_simple_inference_with_text_unet_multi_adapter_block_lora(self):
+ """
+ Tests a simple inference with lora attached to text encoder and unet, attaches
+ multiple adapters and set differnt weights for different blocks (i.e. block lora)
+ """
+ for scheduler_cls in [DDIMScheduler, LCMScheduler]:
+ components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls)
+ pipe = self.pipeline_class(**components)
+ pipe = pipe.to(self.torch_device)
+ pipe.set_progress_bar_config(disable=None)
+ _, _, inputs = self.get_dummy_inputs(with_generator=False)
+
+ output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
+
+ pipe.text_encoder.add_adapter(text_lora_config, "adapter-1")
+ pipe.text_encoder.add_adapter(text_lora_config, "adapter-2")
+
+ pipe.unet.add_adapter(unet_lora_config, "adapter-1")
+ pipe.unet.add_adapter(unet_lora_config, "adapter-2")
+
+ self.assertTrue(
+ self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
+ )
+ self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
+
+ if self.has_two_text_encoders:
+ pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1")
+ pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-2")
+ self.assertTrue(
+ self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
+ )
+
+ scales_1 = {"down": 5}
+ scales_2 = {"down": 5, "mid": 5}
+
+ pipe.set_adapters("adapter-1", scales_1)
+
+ output_adapter_1 = pipe(**inputs, generator=torch.manual_seed(0)).images
+
+ pipe.set_adapters("adapter-2", scales_2)
+ output_adapter_2 = pipe(**inputs, generator=torch.manual_seed(0)).images
+
+ pipe.set_adapters(["adapter-1", "adapter-2"], [scales_1, scales_2])
+
+ output_adapter_mixed = pipe(**inputs, generator=torch.manual_seed(0)).images
+
+ # Fuse and unfuse should lead to the same results
+ self.assertFalse(
+ np.allclose(output_adapter_1, output_adapter_2, atol=1e-3, rtol=1e-3),
+ "Adapter 1 and 2 should give different results",
+ )
+
+ self.assertFalse(
+ np.allclose(output_adapter_1, output_adapter_mixed, atol=1e-3, rtol=1e-3),
+ "Adapter 1 and mixed adapters should give different results",
+ )
+
+ self.assertFalse(
+ np.allclose(output_adapter_2, output_adapter_mixed, atol=1e-3, rtol=1e-3),
+ "Adapter 2 and mixed adapters should give different results",
+ )
+
+ pipe.disable_lora()
+
+ output_disabled = pipe(**inputs, generator=torch.manual_seed(0)).images
+
+ self.assertTrue(
+ np.allclose(output_no_lora, output_disabled, atol=1e-3, rtol=1e-3),
+ "output with no lora and output with lora disabled should give same results",
+ )
+
+ def test_simple_inference_with_text_unet_block_scale_for_all_dict_options(self):
+ """Tests that any valid combination of lora block scales can be used in pipe.set_adapter"""
+
+ def updown_options(blocks_with_tf, layers_per_block, value):
+ """
+ Generate every possible combination for how a lora weight dict for the up/down part can be.
+ E.g. 2, {"block_1": 2}, {"block_1": [2,2,2]}, {"block_1": 2, "block_2": [2,2,2]}, ...
+ """
+ num_val = value
+ list_val = [value] * layers_per_block
+
+ node_opts = [None, num_val, list_val]
+ node_opts_foreach_block = [node_opts] * len(blocks_with_tf)
+
+ updown_opts = [num_val]
+ for nodes in product(*node_opts_foreach_block):
+ if all(n is None for n in nodes):
+ continue
+ opt = {}
+ for b, n in zip(blocks_with_tf, nodes):
+ if n is not None:
+ opt["block_" + str(b)] = n
+ updown_opts.append(opt)
+ return updown_opts
+
+ def all_possible_dict_opts(unet, value):
+ """
+ Generate every possible combination for how a lora weight dict can be.
+ E.g. 2, {"down": 2}, {"down": [2,2,2]}, {"mid": 2, "up": [2,2,2]}, ...
+ """
+
+ down_blocks_with_tf = [i for i, d in enumerate(unet.down_blocks) if hasattr(d, "attentions")]
+ up_blocks_with_tf = [i for i, u in enumerate(unet.up_blocks) if hasattr(u, "attentions")]
+
+ layers_per_block = unet.config.layers_per_block
+
+ mid_opts = [None, value]
+ down_opts = [None] + updown_options(down_blocks_with_tf, layers_per_block, value)
+ up_opts = [None] + updown_options(up_blocks_with_tf, layers_per_block + 1, value)
+
+ opts = []
+
+ for d, m, u in product(down_opts, mid_opts, up_opts):
+ if all(o is None for o in (d, m, u)):
+ continue
+ opt = {}
+ if d is not None:
+ opt["down"] = d
+ if m is not None:
+ opt["mid"] = m
+ if u is not None:
+ opt["up"] = u
+ opts.append(opt)
+
+ return opts
+
+ components, text_lora_config, unet_lora_config = self.get_dummy_components(self.scheduler_cls)
+ pipe = self.pipeline_class(**components)
+ pipe = pipe.to(self.torch_device)
+ pipe.set_progress_bar_config(disable=None)
+ _, _, inputs = self.get_dummy_inputs(with_generator=False)
+
+ pipe.text_encoder.add_adapter(text_lora_config, "adapter-1")
+ pipe.unet.add_adapter(unet_lora_config, "adapter-1")
+
+ if self.has_two_text_encoders:
+ pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1")
+
+ for scale_dict in all_possible_dict_opts(pipe.unet, value=1234):
+ # test if lora block scales can be set with this scale_dict
+ pipe.set_adapters("adapter-1", scale_dict)
+
def test_simple_inference_with_text_unet_multi_adapter_delete_adapter(self):
"""
Tests a simple inference with lora attached to text encoder and unet, attaches
From 7405aff7c989af20f8f4c1b816d6a1f959188508 Mon Sep 17 00:00:00 2001
From: UmerHA <40663591+UmerHA@users.noreply.github.com>
Date: Sat, 16 Mar 2024 10:58:09 +0100
Subject: [PATCH 03/25] Finishing up
---
Pipfile | 11 -----------
docs/source/en/using-diffusers/loading_adapters.md | 2 +-
src/diffusers/loaders/lora.py | 8 ++++++++
src/diffusers/loaders/unet.py | 2 +-
4 files changed, 10 insertions(+), 13 deletions(-)
delete mode 100644 Pipfile
diff --git a/Pipfile b/Pipfile
deleted file mode 100644
index 0757494bb360..000000000000
--- a/Pipfile
+++ /dev/null
@@ -1,11 +0,0 @@
-[[source]]
-url = "https://pypi.org/simple"
-verify_ssl = true
-name = "pypi"
-
-[packages]
-
-[dev-packages]
-
-[requires]
-python_version = "3.11"
diff --git a/docs/source/en/using-diffusers/loading_adapters.md b/docs/source/en/using-diffusers/loading_adapters.md
index 3b9231ec0c60..4bff96b98d60 100644
--- a/docs/source/en/using-diffusers/loading_adapters.md
+++ b/docs/source/en/using-diffusers/loading_adapters.md
@@ -170,7 +170,7 @@ scales = {
"block_1": [0.4, 0.8, 1.0], # the 3 transformers in the 1st block in the up-part will use scales 0.4m 0.8 and 1.0 respectively
}
}
-pipe.load_lora_weights("my_adapter", scales)
+pipe.load_lora_weights("my_adapter", scales)
```
diff --git a/src/diffusers/loaders/lora.py b/src/diffusers/loaders/lora.py
index f16ca5deee2d..8a6279b28bec 100644
--- a/src/diffusers/loaders/lora.py
+++ b/src/diffusers/loaders/lora.py
@@ -1049,6 +1049,14 @@ def set_adapters(
unet_weights, text_encoder_weights = [], []
for weights in adapter_weights:
if isinstance(weights, dict):
+ valid_keys = ["text_encoder", "down", "mid", "up"]
+
+ for k in weights.keys():
+ if k not in valid_keys:
+ raise ValueError(
+ f"Got invalid key '{k}' in lora weight dict. Allowed keys are 'text_encoder', 'down', 'mid', 'up'."
+ )
+
unet_weight = {k: v for k, v in weights.items() if k != "text_encoder"}
if len(unet_weight) == 0:
unet_weight = None
diff --git a/src/diffusers/loaders/unet.py b/src/diffusers/loaders/unet.py
index 841760dce45a..30ee3edbef25 100644
--- a/src/diffusers/loaders/unet.py
+++ b/src/diffusers/loaders/unet.py
@@ -708,7 +708,7 @@ def set_adapters(
transformer_per_block = {"down": self.config.layers_per_block, "up": self.config.layers_per_block + 1}
weights = [
- self._expand_lora_weight_dict(weight_for_adapter, blocks_with_transformer, transformer_per_block)
+ self._expand_lora_scales_dict(weight_for_adapter, blocks_with_transformer, transformer_per_block)
for weight_for_adapter in weights
]
From 5c19f186836b5a56ee1120e7673c23d01c70ee73 Mon Sep 17 00:00:00 2001
From: UmerHA <40663591+UmerHA@users.noreply.github.com>
Date: Sat, 16 Mar 2024 11:01:11 +0100
Subject: [PATCH 04/25] Reverted unrelated changes made by make style
---
docs/source/en/_toctree.yml | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml
index 29bd65fb4dba..15f4f460458e 100644
--- a/docs/source/en/_toctree.yml
+++ b/docs/source/en/_toctree.yml
@@ -400,14 +400,14 @@
title: DPMSolverSDEScheduler
- local: api/schedulers/singlestep_dpm_solver
title: DPMSolverSinglestepScheduler
- - local: api/schedulers/edm_multistep_dpm_solver
- title: EDMDPMSolverMultistepScheduler
- - local: api/schedulers/edm_euler
- title: EDMEulerScheduler
- local: api/schedulers/euler_ancestral
title: EulerAncestralDiscreteScheduler
- local: api/schedulers/euler
title: EulerDiscreteScheduler
+ - local: api/schedulers/edm_euler
+ title: EDMEulerScheduler
+ - local: api/schedulers/edm_multistep_dpm_solver
+ title: EDMDPMSolverMultistepScheduler
- local: api/schedulers/heun
title: HeunDiscreteScheduler
- local: api/schedulers/ipndm
From 8908c903bc1f8e095171cfe5796c76424f50b4e9 Mon Sep 17 00:00:00 2001
From: UmerHA <40663591+UmerHA@users.noreply.github.com>
Date: Sat, 16 Mar 2024 11:09:09 +0100
Subject: [PATCH 05/25] Fixed typo
---
docs/source/en/using-diffusers/loading_adapters.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/source/en/using-diffusers/loading_adapters.md b/docs/source/en/using-diffusers/loading_adapters.md
index 4bff96b98d60..85dbeffb308a 100644
--- a/docs/source/en/using-diffusers/loading_adapters.md
+++ b/docs/source/en/using-diffusers/loading_adapters.md
@@ -167,7 +167,7 @@ scales = {
# "mid" # because "mid" is not given, all transformers in the mid part will use the default scale 1.0
"up": {
"block_0": 0.6, # all 3 transformers in the 0th block in the up-part will use scale 0.6
- "block_1": [0.4, 0.8, 1.0], # the 3 transformers in the 1st block in the up-part will use scales 0.4m 0.8 and 1.0 respectively
+ "block_1": [0.4, 0.8, 1.0], # the 3 transformers in the 1st block in the up-part will use scales 0.4, 0.8 and 1.0 respectively
}
}
pipe.load_lora_weights("my_adapter", scales)
From 7e6ce831dd674925da741b44effebf8af7e511a5 Mon Sep 17 00:00:00 2001
From: UmerHA <40663591+UmerHA@users.noreply.github.com>
Date: Sat, 16 Mar 2024 20:16:02 +0100
Subject: [PATCH 06/25] Fixed bug + Made text_encoder_2 scalable
---
.../en/using-diffusers/loading_adapters.md | 1 +
src/diffusers/loaders/lora.py | 73 +++++++++++++------
src/diffusers/loaders/unet.py | 2 +
tests/lora/test_lora_layers_peft.py | 13 +++-
4 files changed, 63 insertions(+), 26 deletions(-)
diff --git a/docs/source/en/using-diffusers/loading_adapters.md b/docs/source/en/using-diffusers/loading_adapters.md
index 85dbeffb308a..e7ca44aefa3e 100644
--- a/docs/source/en/using-diffusers/loading_adapters.md
+++ b/docs/source/en/using-diffusers/loading_adapters.md
@@ -163,6 +163,7 @@ pipe = ... # create pipeline
pipe.load_lora_weights(..., adapter_name="my_adapter")
scales = {
"text_encoder": 0.5,
+ "text_encoder_2": 0.5, # only usable if pipe has a 2nd text encoder
"down": 0.9, # all transformers in the down-part will use scale 0.9
# "mid" # because "mid" is not given, all transformers in the mid part will use the default scale 1.0
"up": {
diff --git a/src/diffusers/loaders/lora.py b/src/diffusers/loaders/lora.py
index 8a6279b28bec..8b002d409126 100644
--- a/src/diffusers/loaders/lora.py
+++ b/src/diffusers/loaders/lora.py
@@ -1040,34 +1040,59 @@ def set_adapters(
):
adapter_names = [adapter_names] if isinstance(adapter_names, str) else adapter_names
- if adapter_weights is None:
- unet_weights, text_encoder_weights = None, None
- else:
- if isinstance(adapter_weights, (float, dict)):
- adapter_weights = [adapter_weights] * len(adapter_names)
+ number = (float, int)
+ has_2nd_text_encoder = hasattr(self, "text_encoder_2")
+
+ # Expand weights into a list, one entry per adapter
+ if adapter_weights is None or isinstance(adapter_weights, (number, dict)):
+ adapter_weights = [adapter_weights] * len(adapter_names)
+
+ if len(adapter_names) != len(adapter_weights):
+ raise ValueError(
+ f"Length of adapter names {len(adapter_names)} is not equal to the length of the weights {len(adapter_weights)}"
+ )
+
+ # Normalize into dicts
+ allowed_keys = ["text_encoder", "down", "mid", "up"]
+ if has_2nd_text_encoder:
+ allowed_keys.append("text_encoder_2")
+
+ def ensure_is_dict(weight):
+ if isinstance(weight, dict):
+ return weight
+ elif isinstance(weight, number):
+ return {key: weight for key in allowed_keys}
+ elif weight is None:
+ return {key: 1.0 for key in allowed_keys}
+ else:
+ raise RuntimeError(f"lora weight has wrong type {type(weight)}.")
- unet_weights, text_encoder_weights = [], []
- for weights in adapter_weights:
- if isinstance(weights, dict):
- valid_keys = ["text_encoder", "down", "mid", "up"]
+ adapter_weights = [ensure_is_dict(weight) for weight in adapter_weights]
- for k in weights.keys():
- if k not in valid_keys:
- raise ValueError(
- f"Got invalid key '{k}' in lora weight dict. Allowed keys are 'text_encoder', 'down', 'mid', 'up'."
- )
+ for weights in adapter_weights:
+ for k in weights.keys():
+ if k not in allowed_keys:
+ raise ValueError(
+ f"Got invalid key '{k}' in lora weight dict. Allowed keys are 'text_encoder', 'text_encoder_2', 'down', 'mid', 'up'."
+ )
- unet_weight = {k: v for k, v in weights.items() if k != "text_encoder"}
- if len(unet_weight) == 0:
- unet_weight = None
- text_encoder_weight = weights.get("text_encoder", None)
+ # Decompose weights into weights for unet, text_encoder and text_encoder_2
+ unet_weights, text_encoder_weights = [], []
+ if has_2nd_text_encoder:
+ text_encoder_2_weights = []
- unet_weights.append(unet_weight)
- text_encoder_weights.append(text_encoder_weight)
+ for weights in adapter_weights:
+ unet_weight = {k: v for k, v in weights.items() if "text_encoder" not in k}
+ if len(unet_weight) == 0:
+ unet_weight = None
+ text_encoder_weight = weights.get("text_encoder", None)
+ if has_2nd_text_encoder:
+ text_encoder_2_weight = weights.get("text_encoder_2", None)
- else:
- unet_weights.append(weights)
- text_encoder_weights.append(weights)
+ unet_weights.append(unet_weight)
+ text_encoder_weights.append(text_encoder_weight)
+ if has_2nd_text_encoder:
+ text_encoder_2_weights.append(text_encoder_2_weight)
unet = getattr(self, self.unet_name) if not hasattr(self, "unet") else self.unet
# Handle the UNET
@@ -1077,7 +1102,7 @@ def set_adapters(
if hasattr(self, "text_encoder"):
self.set_adapters_for_text_encoder(adapter_names, self.text_encoder, text_encoder_weights)
if hasattr(self, "text_encoder_2"):
- self.set_adapters_for_text_encoder(adapter_names, self.text_encoder_2, text_encoder_weights)
+ self.set_adapters_for_text_encoder(adapter_names, self.text_encoder_2, text_encoder_2_weights)
def disable_lora(self):
if not USE_PEFT_BACKEND:
diff --git a/src/diffusers/loaders/unet.py b/src/diffusers/loaders/unet.py
index 30ee3edbef25..16d4cf7bc861 100644
--- a/src/diffusers/loaders/unet.py
+++ b/src/diffusers/loaders/unet.py
@@ -701,6 +701,8 @@ def set_adapters(
f"Length of adapter names {len(adapter_names)} is not equal to the length of their weights {len(weights)}."
)
+ # Set missing value to default of 1.0
+ weights = [weight or 1.0 for weight in weights]
blocks_with_transformer = {
"down": [i for i, block in enumerate(self.down_blocks) if hasattr(block, "attentions")],
"up": [i for i, block in enumerate(self.up_blocks) if hasattr(block, "attentions")],
diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index 4c741fde898c..31a7180018ce 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -988,16 +988,22 @@ def all_possible_dict_opts(unet, value):
layers_per_block = unet.config.layers_per_block
+ text_encoder_opts = [None, value]
+ text_encoder_2_opts = [None, value]
mid_opts = [None, value]
down_opts = [None] + updown_options(down_blocks_with_tf, layers_per_block, value)
up_opts = [None] + updown_options(up_blocks_with_tf, layers_per_block + 1, value)
opts = []
- for d, m, u in product(down_opts, mid_opts, up_opts):
- if all(o is None for o in (d, m, u)):
+ for t1, t2, d, m, u in product(text_encoder_opts, text_encoder_2_opts, down_opts, mid_opts, up_opts):
+ if all(o is None for o in (t1, t2, d, m, u)):
continue
opt = {}
+ if t1 is not None:
+ opt["text_encoder"] = t1
+ if t2 is not None:
+ opt["text_encoder_2"] = t2
if d is not None:
opt["down"] = d
if m is not None:
@@ -1022,6 +1028,9 @@ def all_possible_dict_opts(unet, value):
for scale_dict in all_possible_dict_opts(pipe.unet, value=1234):
# test if lora block scales can be set with this scale_dict
+ if not self.has_two_text_encoders and "text_encoder_2" in scale_dict:
+ del scale_dict["text_encoder_2"]
+
pipe.set_adapters("adapter-1", scale_dict)
def test_simple_inference_with_text_unet_multi_adapter_delete_adapter(self):
From 3c841fc0a49889e9038cf74e6b921af67dce2627 Mon Sep 17 00:00:00 2001
From: UmerHA <40663591+UmerHA@users.noreply.github.com>
Date: Mon, 18 Mar 2024 12:25:02 +0100
Subject: [PATCH 07/25] Integrated some review feedback
---
src/diffusers/loaders/lora.py | 24 ++++++++++----------
src/diffusers/loaders/unet.py | 12 +++++-----
tests/lora/test_lora_layers_peft.py | 34 ++++++++++++++++++++---------
3 files changed, 41 insertions(+), 29 deletions(-)
diff --git a/src/diffusers/loaders/lora.py b/src/diffusers/loaders/lora.py
index 8b002d409126..6e757b924469 100644
--- a/src/diffusers/loaders/lora.py
+++ b/src/diffusers/loaders/lora.py
@@ -1036,15 +1036,15 @@ def enable_lora_for_text_encoder(self, text_encoder: Optional["PreTrainedModel"]
def set_adapters(
self,
adapter_names: Union[List[str], str],
- adapter_weights: Optional[Union[List[float], List[Dict]]] = None,
+ adapter_weights: Optional[Union[List[float], float, List[Dict], Dict]] = None,
):
adapter_names = [adapter_names] if isinstance(adapter_names, str) else adapter_names
- number = (float, int)
- has_2nd_text_encoder = hasattr(self, "text_encoder_2")
+ allowed_numeric_dtypes = (float, int)
+ has_second_text_encoder = hasattr(self, "text_encoder_2")
# Expand weights into a list, one entry per adapter
- if adapter_weights is None or isinstance(adapter_weights, (number, dict)):
+ if adapter_weights is None or isinstance(adapter_weights, (allowed_numeric_dtypes, dict)):
adapter_weights = [adapter_weights] * len(adapter_names)
if len(adapter_names) != len(adapter_weights):
@@ -1053,14 +1053,14 @@ def set_adapters(
)
# Normalize into dicts
- allowed_keys = ["text_encoder", "down", "mid", "up"]
- if has_2nd_text_encoder:
+ allowed_keys = ["text_encoder", "unet"]
+ if has_second_text_encoder:
allowed_keys.append("text_encoder_2")
def ensure_is_dict(weight):
if isinstance(weight, dict):
return weight
- elif isinstance(weight, number):
+ elif isinstance(weight, allowed_numeric_dtypes):
return {key: weight for key in allowed_keys}
elif weight is None:
return {key: 1.0 for key in allowed_keys}
@@ -1078,20 +1078,18 @@ def ensure_is_dict(weight):
# Decompose weights into weights for unet, text_encoder and text_encoder_2
unet_weights, text_encoder_weights = [], []
- if has_2nd_text_encoder:
+ if has_second_text_encoder:
text_encoder_2_weights = []
for weights in adapter_weights:
- unet_weight = {k: v for k, v in weights.items() if "text_encoder" not in k}
- if len(unet_weight) == 0:
- unet_weight = None
+ unet_weight = weights.get("unet", None)
text_encoder_weight = weights.get("text_encoder", None)
- if has_2nd_text_encoder:
+ if has_second_text_encoder:
text_encoder_2_weight = weights.get("text_encoder_2", None)
unet_weights.append(unet_weight)
text_encoder_weights.append(text_encoder_weight)
- if has_2nd_text_encoder:
+ if has_second_text_encoder:
text_encoder_2_weights.append(text_encoder_2_weight)
unet = getattr(self, self.unet_name) if not hasattr(self, "unet") else self.unet
diff --git a/src/diffusers/loaders/unet.py b/src/diffusers/loaders/unet.py
index 16d4cf7bc861..f9bcd546c24b 100644
--- a/src/diffusers/loaders/unet.py
+++ b/src/diffusers/loaders/unet.py
@@ -565,7 +565,7 @@ def _expand_lora_scales_dict(
self, scales, blocks_with_transformer: Dict[str, int], transformer_per_block: Dict[str, int]
):
"""
- Expand input into a weight dict with a weight per transformer
+ Expands the inputs into a more granular dictionary. See the example below for more details.
Parameters:
blocks_with_transformer (`Dict[str, int]`):
@@ -597,7 +597,7 @@ def _expand_lora_scales_dict(
'up.block_1.2': 7,
}
"""
- number = (float, int)
+ allowed_numeric_dtypes = (float, int)
if sorted(blocks_with_transformer.keys()) != ["down", "up"]:
raise ValueError("blocks_with_transformer needs to be a dict with keys `'down' and `'up'`")
@@ -605,7 +605,7 @@ def _expand_lora_scales_dict(
if sorted(transformer_per_block.keys()) != ["down", "up"]:
raise ValueError("transformer_per_block needs to be a dict with keys `'down' and `'up'`")
- if isinstance(scales, number):
+ if isinstance(scales, allowed_numeric_dtypes):
scales = {o: scales for o in ["down", "mid", "up"]}
if "mid" not in scales:
@@ -616,13 +616,13 @@ def _expand_lora_scales_dict(
scales[updown] = 1
# eg {"down": 1} to {"down": {"block_1": 1, "block_2": 1}}}
- if isinstance(scales[updown], number):
+ if isinstance(scales[updown], allowed_numeric_dtypes):
scales[updown] = {f"block_{i}": scales[updown] for i in blocks_with_transformer[updown]}
# eg {"down": "block_1": 1}} to {"down": "block_1": [1, 1]}}
for i in blocks_with_transformer[updown]:
block = f"block_{i}"
- if isinstance(scales[updown][block], number):
+ if isinstance(scales[updown][block], allowed_numeric_dtypes):
scales[updown][block] = [scales[updown][block] for _ in range(transformer_per_block[updown])]
# eg {"down": "block_1": [1, 1]}} to {"down.block_1.0": 1, "down.block_1.1": 1}
@@ -658,7 +658,7 @@ def layer_name(name):
def set_adapters(
self,
adapter_names: Union[List[str], str],
- weights: Optional[Union[List[float], float]] = None,
+ weights: Optional[Union[List[float], float, List[Dict], Dict]] = None,
):
"""
Set the currently active adapters for use in the UNet.
diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index 31a7180018ce..7e640ccfb9a4 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -846,16 +846,18 @@ def test_simple_inference_with_text_unet_block_scale(self):
self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
)
- # todo umer: check all possible inputs options
-
weights_1 = {
- "down": 5,
+ "unet" : {
+ "down": 5,
+ }
}
pipe.set_adapters("adapter-1", weights_1)
output_weights_1 = pipe(**inputs, generator=torch.manual_seed(0)).images
weights_2 = {
- "up": 5,
+ "unet" : {
+ "up": 5,
+ }
}
pipe.set_adapters("adapter-1", weights_2)
output_weights_2 = pipe(**inputs, generator=torch.manual_seed(0)).images
@@ -913,9 +915,17 @@ def test_simple_inference_with_text_unet_multi_adapter_block_lora(self):
self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
)
- scales_1 = {"down": 5}
- scales_2 = {"down": 5, "mid": 5}
-
+ scales_1 = {
+ "unet" : {
+ "down": 5
+ }
+ }
+ scales_2 = {
+ "unet" : {
+ "down": 5,
+ "mid": 5
+ }
+ }
pipe.set_adapters("adapter-1", scales_1)
output_adapter_1 = pipe(**inputs, generator=torch.manual_seed(0)).images
@@ -1004,12 +1014,16 @@ def all_possible_dict_opts(unet, value):
opt["text_encoder"] = t1
if t2 is not None:
opt["text_encoder_2"] = t2
+ if all(o is None for o in (d, m, u)):
+ # no unet scaling
+ continue
+ opt["unet"] = {}
if d is not None:
- opt["down"] = d
+ opt["unet"]["down"] = d
if m is not None:
- opt["mid"] = m
+ opt["unet"]["mid"] = m
if u is not None:
- opt["up"] = u
+ opt["unet"]["up"] = u
opts.append(opt)
return opts
From 72b8752c915fe8a6ba50fdc2a3ae8aa0f46c91d4 Mon Sep 17 00:00:00 2001
From: UmerHA <40663591+UmerHA@users.noreply.github.com>
Date: Tue, 19 Mar 2024 13:28:27 +0100
Subject: [PATCH 08/25] Incorporated review feedback
---
.../en/using-diffusers/loading_adapters.md | 31 +++++----
src/diffusers/loaders/lora.py | 69 +++++++------------
src/diffusers/loaders/unet.py | 41 ++++++-----
3 files changed, 65 insertions(+), 76 deletions(-)
diff --git a/docs/source/en/using-diffusers/loading_adapters.md b/docs/source/en/using-diffusers/loading_adapters.md
index e7ca44aefa3e..78469519fcd7 100644
--- a/docs/source/en/using-diffusers/loading_adapters.md
+++ b/docs/source/en/using-diffusers/loading_adapters.md
@@ -153,32 +153,33 @@ image
-
+To unload the LoRA weights, use the [`~loaders.LoraLoaderMixin.unload_lora_weights`] method to discard the LoRA weights and restore the model to its original weights:
+
+```py
+pipeline.unload_lora_weights()
+```
+
+### Adjust LoRA weight scale
For both [`~loaders.LoraLoaderMixin.load_lora_weights`] and [`~loaders.UNet2DConditionLoadersMixin.load_attn_procs`], you can pass the `cross_attention_kwargs={"scale": 0.5}` parameter to adjust how much of the LoRA weights to use. A value of `0` is the same as only using the base model weights, and a value of `1` is equivalent to using the fully finetuned LoRA.
-For fine-grained control on how much of the LoRA weights are used, use [`~loaders.LoraLoaderMixin.set_adapters`]. Here, you can define scale of any granularity up to per-transformer.
+For more granular control on the amount of LoRA weights used per layer, you can use [`~loaders.LoraLoaderMixin.set_adapters`] and pass a dictionary specifying how much to scale the weights in each layer by.
```python
pipe = ... # create pipeline
pipe.load_lora_weights(..., adapter_name="my_adapter")
scales = {
"text_encoder": 0.5,
"text_encoder_2": 0.5, # only usable if pipe has a 2nd text encoder
- "down": 0.9, # all transformers in the down-part will use scale 0.9
- # "mid" # because "mid" is not given, all transformers in the mid part will use the default scale 1.0
- "up": {
- "block_0": 0.6, # all 3 transformers in the 0th block in the up-part will use scale 0.6
- "block_1": [0.4, 0.8, 1.0], # the 3 transformers in the 1st block in the up-part will use scales 0.4, 0.8 and 1.0 respectively
+ "unet": {
+ "down": 0.9, # all transformers in the down-part will use scale 0.9
+ # "mid" # in this example "mid" is not given, therefore all transformers in the mid part will use the default scale 1.0
+ "up": {
+ "block_0": 0.6, # all 3 transformers in the 0th block in the up-part will use scale 0.6
+ "block_1": [0.4, 0.8, 1.0], # the 3 transformers in the 1st block in the up-part will use scales 0.4, 0.8 and 1.0 respectively
+ }
}
}
-pipe.load_lora_weights("my_adapter", scales)
-```
-
-
-To unload the LoRA weights, use the [`~loaders.LoraLoaderMixin.unload_lora_weights`] method to discard the LoRA weights and restore the model to its original weights:
-
-```py
-pipeline.unload_lora_weights()
+pipe.set_adapters("my_adapter", scales)
```
### Kohya and TheLastBen
diff --git a/src/diffusers/loaders/lora.py b/src/diffusers/loaders/lora.py
index 6e757b924469..06ee0a76676b 100644
--- a/src/diffusers/loaders/lora.py
+++ b/src/diffusers/loaders/lora.py
@@ -14,6 +14,7 @@
import inspect
import os
from pathlib import Path
+from types import NoneType
from typing import Callable, Dict, List, Optional, Union
import safetensors
@@ -959,7 +960,7 @@ def set_adapters_for_text_encoder(
self,
adapter_names: Union[List[str], str],
text_encoder: Optional["PreTrainedModel"] = None, # noqa: F821
- text_encoder_weights: Optional[Union[float, List[float]]] = None,
+ text_encoder_weights: Optional[Union[float, List[float], List[NoneType]]] = None,
):
"""
Sets the adapter layers for the text encoder.
@@ -977,17 +978,16 @@ def set_adapters_for_text_encoder(
raise ValueError("PEFT backend is required for this method.")
def process_weights(adapter_names, weights):
- if weights is None:
- weights = [1.0] * len(adapter_names)
- elif isinstance(weights, float):
- weights = [weights]
+ if not isinstance(weights, list):
+ weights = [weights] * len(adapter_names)
if len(adapter_names) != len(weights):
raise ValueError(
f"Length of adapter names {len(adapter_names)} is not equal to the length of the weights {len(weights)}"
)
- weights = [{"text_model": w} if w is not None else {"text_model": 1.0} for w in weights]
+ weights = [w or 1.0 for w in weights] # Set None values to default of 1.0
+ weights = [{"text_model": w} for w in weights]
return weights
@@ -1036,15 +1036,13 @@ def enable_lora_for_text_encoder(self, text_encoder: Optional["PreTrainedModel"]
def set_adapters(
self,
adapter_names: Union[List[str], str],
- adapter_weights: Optional[Union[List[float], float, List[Dict], Dict]] = None,
+ adapter_weights: Optional[Union[float, Dict, List[float], List[Dict]]] = None,
):
- adapter_names = [adapter_names] if isinstance(adapter_names, str) else adapter_names
- allowed_numeric_dtypes = (float, int)
- has_second_text_encoder = hasattr(self, "text_encoder_2")
+ adapter_names = [adapter_names] if isinstance(adapter_names, str) else adapter_names
# Expand weights into a list, one entry per adapter
- if adapter_weights is None or isinstance(adapter_weights, (allowed_numeric_dtypes, dict)):
+ if not isinstance(adapter_weights, list):
adapter_weights = [adapter_weights] * len(adapter_names)
if len(adapter_names) != len(adapter_weights):
@@ -1052,45 +1050,28 @@ def set_adapters(
f"Length of adapter names {len(adapter_names)} is not equal to the length of the weights {len(adapter_weights)}"
)
- # Normalize into dicts
- allowed_keys = ["text_encoder", "unet"]
- if has_second_text_encoder:
- allowed_keys.append("text_encoder_2")
-
- def ensure_is_dict(weight):
- if isinstance(weight, dict):
- return weight
- elif isinstance(weight, allowed_numeric_dtypes):
- return {key: weight for key in allowed_keys}
- elif weight is None:
- return {key: 1.0 for key in allowed_keys}
- else:
- raise RuntimeError(f"lora weight has wrong type {type(weight)}.")
-
- adapter_weights = [ensure_is_dict(weight) for weight in adapter_weights]
+ # Decompose weights into weights for unet, text_encoder and text_encoder_2
+ unet_weights, text_encoder_weights, text_encoder_2_weights = [], [], []
- for weights in adapter_weights:
- for k in weights.keys():
- if k not in allowed_keys:
- raise ValueError(
- f"Got invalid key '{k}' in lora weight dict. Allowed keys are 'text_encoder', 'text_encoder_2', 'down', 'mid', 'up'."
- )
+ for adapter_name, weights in zip(adapter_names, adapter_weights):
+ if isinstance(weights, dict):
+ unet_weight = weights.pop("unet", None)
+ text_encoder_weight = weights.pop("text_encoder", None)
+ text_encoder_2_weight = weights.pop("text_encoder_2", None)
- # Decompose weights into weights for unet, text_encoder and text_encoder_2
- unet_weights, text_encoder_weights = [], []
- if has_second_text_encoder:
- text_encoder_2_weights = []
+ if len(weights) >0:
+ raise ValueError(f"Got invalid key '{weights.keys()}' in lora weight dict for adapter {adapter_name}.")
- for weights in adapter_weights:
- unet_weight = weights.get("unet", None)
- text_encoder_weight = weights.get("text_encoder", None)
- if has_second_text_encoder:
- text_encoder_2_weight = weights.get("text_encoder_2", None)
+ if text_encoder_2_weight is not None and not hasattr(self, "text_encoder_2"):
+ logger.warning("Lora weight dict contains text_encoder_2 weights but will be ignored because pipeline does not have text_encoder_2.")
+ else:
+ unet_weight = weights
+ text_encoder_weight = weights
+ text_encoder_2_weight = weights
unet_weights.append(unet_weight)
text_encoder_weights.append(text_encoder_weight)
- if has_second_text_encoder:
- text_encoder_2_weights.append(text_encoder_2_weight)
+ text_encoder_2_weights.append(text_encoder_2_weight)
unet = getattr(self, self.unet_name) if not hasattr(self, "unet") else self.unet
# Handle the UNET
diff --git a/src/diffusers/loaders/unet.py b/src/diffusers/loaders/unet.py
index f9bcd546c24b..87a4d5a05f79 100644
--- a/src/diffusers/loaders/unet.py
+++ b/src/diffusers/loaders/unet.py
@@ -17,6 +17,7 @@
from contextlib import nullcontext
from functools import partial
from pathlib import Path
+from types import NoneType
from typing import Callable, Dict, List, Optional, Union
import safetensors
@@ -562,26 +563,36 @@ def _unfuse_lora_apply(self, module):
module.unmerge()
def _expand_lora_scales_dict(
- self, scales, blocks_with_transformer: Dict[str, int], transformer_per_block: Dict[str, int]
+ self, scales: Union[float, Dict], blocks_with_transformer: Dict[str, int], transformer_per_block: Dict[str, int]
):
"""
- Expands the inputs into a more granular dictionary. See the example below for more details.
+ Expands the inputs into a more granular dictionary. See the example below for more details.
Parameters:
+ scales (`Union[float, Dict]`):
+ Scales dict to expand.
blocks_with_transformer (`Dict[str, int]`):
Dict with keys 'up' and 'down', showing which blocks have transformer layers
transformer_per_block (`Dict[str, int]`):
Dict with keys 'up' and 'down', showing how many transformer layers each block has
E.g. turns
- {
+ scales = {
'down': 2,
'mid': 3,
'up': {
- 'block_1': 4,
- 'block_2': [5, 6, 7]
+ 'block_0': 4,
+ 'block_1': [5, 6, 7]
}
}
+ blocks_with_transformer = {
+ 'down': [1,2],
+ 'up': [0,1]
+ }
+ transformer_per_block = {
+ 'down': 2,
+ 'up': 3
+ }
into
{
'down.block_1.0': 2,
@@ -597,15 +608,13 @@ def _expand_lora_scales_dict(
'up.block_1.2': 7,
}
"""
- allowed_numeric_dtypes = (float, int)
-
if sorted(blocks_with_transformer.keys()) != ["down", "up"]:
raise ValueError("blocks_with_transformer needs to be a dict with keys `'down' and `'up'`")
if sorted(transformer_per_block.keys()) != ["down", "up"]:
raise ValueError("transformer_per_block needs to be a dict with keys `'down' and `'up'`")
- if isinstance(scales, allowed_numeric_dtypes):
+ if not isinstance(scales, dict):
scales = {o: scales for o in ["down", "mid", "up"]}
if "mid" not in scales:
@@ -616,13 +625,13 @@ def _expand_lora_scales_dict(
scales[updown] = 1
# eg {"down": 1} to {"down": {"block_1": 1, "block_2": 1}}}
- if isinstance(scales[updown], allowed_numeric_dtypes):
+ if not isinstance(scales[updown], dict):
scales[updown] = {f"block_{i}": scales[updown] for i in blocks_with_transformer[updown]}
# eg {"down": "block_1": 1}} to {"down": "block_1": [1, 1]}}
for i in blocks_with_transformer[updown]:
block = f"block_{i}"
- if isinstance(scales[updown][block], allowed_numeric_dtypes):
+ if not isinstance(scales[updown][block], dict):
scales[updown][block] = [scales[updown][block] for _ in range(transformer_per_block[updown])]
# eg {"down": "block_1": [1, 1]}} to {"down.block_1.0": 1, "down.block_1.1": 1}
@@ -650,7 +659,7 @@ def layer_name(name):
for layer in scales.keys():
if not any(layer_name(layer) in module for module in state_dict.keys()):
raise ValueError(
- f"Can't set lora scale for layer {layer}. It either doesn't exist in this unet or has not attentions."
+ f"Can't set lora scale for layer {layer}. It either doesn't exist in this unet or it has no attentions."
)
return {layer_name(name): weight for name, weight in scales.items()}
@@ -658,7 +667,7 @@ def layer_name(name):
def set_adapters(
self,
adapter_names: Union[List[str], str],
- weights: Optional[Union[List[float], float, List[Dict], Dict]] = None,
+ weights: Optional[Union[float, Dict, List[float], List[Dict], List[NoneType]]] = None,
):
"""
Set the currently active adapters for use in the UNet.
@@ -691,9 +700,8 @@ def set_adapters(
adapter_names = [adapter_names] if isinstance(adapter_names, str) else adapter_names
- if weights is None:
- weights = [1.0] * len(adapter_names)
- elif isinstance(weights, (float, dict)):
+ # Expand weights into a list, one entry per adapter
+ if not isinstance(weights, list):
weights = [weights] * len(adapter_names)
if len(adapter_names) != len(weights):
@@ -701,8 +709,7 @@ def set_adapters(
f"Length of adapter names {len(adapter_names)} is not equal to the length of their weights {len(weights)}."
)
- # Set missing value to default of 1.0
- weights = [weight or 1.0 for weight in weights]
+ weights = [weight or 1.0 for weight in weights] # Set None values to default of 1.0
blocks_with_transformer = {
"down": [i for i, block in enumerate(self.down_blocks) if hasattr(block, "attentions")],
"up": [i for i, block in enumerate(self.up_blocks) if hasattr(block, "attentions")],
From 145c7f30719c29fbbb0f9cbd22a15287dd16f122 Mon Sep 17 00:00:00 2001
From: UmerHA <40663591+UmerHA@users.noreply.github.com>
Date: Tue, 19 Mar 2024 14:15:32 +0100
Subject: [PATCH 09/25] Fix tests
---
src/diffusers/loaders/lora.py | 19 +++++++++++--------
src/diffusers/loaders/unet.py | 10 ++++++++--
tests/lora/test_lora_layers_peft.py | 19 +++++--------------
3 files changed, 24 insertions(+), 24 deletions(-)
diff --git a/src/diffusers/loaders/lora.py b/src/diffusers/loaders/lora.py
index 06ee0a76676b..4d0ecd82d3a7 100644
--- a/src/diffusers/loaders/lora.py
+++ b/src/diffusers/loaders/lora.py
@@ -1038,7 +1038,6 @@ def set_adapters(
adapter_names: Union[List[str], str],
adapter_weights: Optional[Union[float, Dict, List[float], List[Dict]]] = None,
):
-
adapter_names = [adapter_names] if isinstance(adapter_names, str) else adapter_names
# Expand weights into a list, one entry per adapter
@@ -1059,15 +1058,19 @@ def set_adapters(
text_encoder_weight = weights.pop("text_encoder", None)
text_encoder_2_weight = weights.pop("text_encoder_2", None)
- if len(weights) >0:
- raise ValueError(f"Got invalid key '{weights.keys()}' in lora weight dict for adapter {adapter_name}.")
+ if len(weights) > 0:
+ raise ValueError(
+ f"Got invalid key '{weights.keys()}' in lora weight dict for adapter {adapter_name}."
+ )
- if text_encoder_2_weight is not None and not hasattr(self, "text_encoder_2"):
- logger.warning("Lora weight dict contains text_encoder_2 weights but will be ignored because pipeline does not have text_encoder_2.")
+ if text_encoder_2_weight is not None and not hasattr(self, "text_encoder_2"):
+ logger.warning(
+ "Lora weight dict contains text_encoder_2 weights but will be ignored because pipeline does not have text_encoder_2."
+ )
else:
- unet_weight = weights
- text_encoder_weight = weights
- text_encoder_2_weight = weights
+ unet_weight = weights
+ text_encoder_weight = weights
+ text_encoder_2_weight = weights
unet_weights.append(unet_weight)
text_encoder_weights.append(text_encoder_weight)
diff --git a/src/diffusers/loaders/unet.py b/src/diffusers/loaders/unet.py
index 87a4d5a05f79..2301ffd55838 100644
--- a/src/diffusers/loaders/unet.py
+++ b/src/diffusers/loaders/unet.py
@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+import copy
import inspect
import os
from collections import defaultdict
@@ -563,7 +564,10 @@ def _unfuse_lora_apply(self, module):
module.unmerge()
def _expand_lora_scales_dict(
- self, scales: Union[float, Dict], blocks_with_transformer: Dict[str, int], transformer_per_block: Dict[str, int]
+ self,
+ scales: Union[float, Dict],
+ blocks_with_transformer: Dict[str, int],
+ transformer_per_block: Dict[str, int],
):
"""
Expands the inputs into a more granular dictionary. See the example below for more details.
@@ -614,6 +618,8 @@ def _expand_lora_scales_dict(
if sorted(transformer_per_block.keys()) != ["down", "up"]:
raise ValueError("transformer_per_block needs to be a dict with keys `'down' and `'up'`")
+ scales = copy.deepcopy(scales)
+
if not isinstance(scales, dict):
scales = {o: scales for o in ["down", "mid", "up"]}
@@ -631,7 +637,7 @@ def _expand_lora_scales_dict(
# eg {"down": "block_1": 1}} to {"down": "block_1": [1, 1]}}
for i in blocks_with_transformer[updown]:
block = f"block_{i}"
- if not isinstance(scales[updown][block], dict):
+ if not isinstance(scales[updown][block], list):
scales[updown][block] = [scales[updown][block] for _ in range(transformer_per_block[updown])]
# eg {"down": "block_1": [1, 1]}} to {"down.block_1.0": 1, "down.block_1.1": 1}
diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
index 7e640ccfb9a4..fb9c938c7d5d 100644
--- a/tests/lora/test_lora_layers_peft.py
+++ b/tests/lora/test_lora_layers_peft.py
@@ -847,7 +847,7 @@ def test_simple_inference_with_text_unet_block_scale(self):
)
weights_1 = {
- "unet" : {
+ "unet": {
"down": 5,
}
}
@@ -855,7 +855,7 @@ def test_simple_inference_with_text_unet_block_scale(self):
output_weights_1 = pipe(**inputs, generator=torch.manual_seed(0)).images
weights_2 = {
- "unet" : {
+ "unet": {
"up": 5,
}
}
@@ -915,17 +915,8 @@ def test_simple_inference_with_text_unet_multi_adapter_block_lora(self):
self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
)
- scales_1 = {
- "unet" : {
- "down": 5
- }
- }
- scales_2 = {
- "unet" : {
- "down": 5,
- "mid": 5
- }
- }
+ scales_1 = {"unet": {"down": 5}}
+ scales_2 = {"unet": {"down": 5, "mid": 5}}
pipe.set_adapters("adapter-1", scales_1)
output_adapter_1 = pipe(**inputs, generator=torch.manual_seed(0)).images
@@ -990,7 +981,7 @@ def updown_options(blocks_with_tf, layers_per_block, value):
def all_possible_dict_opts(unet, value):
"""
Generate every possible combination for how a lora weight dict can be.
- E.g. 2, {"down": 2}, {"down": [2,2,2]}, {"mid": 2, "up": [2,2,2]}, ...
+ E.g. 2, {"unet: {"down": 2}}, {"unet: {"down": [2,2,2]}}, {"unet: {"mid": 2, "up": [2,2,2]}}, ...
"""
down_blocks_with_tf = [i for i, d in enumerate(unet.down_blocks) if hasattr(d, "attentions")]
From 83ff34bcb4413ad6fea61dd5a64b723fea649329 Mon Sep 17 00:00:00 2001
From: UmerHA <40663591+UmerHA@users.noreply.github.com>
Date: Wed, 20 Mar 2024 11:07:01 +0100
Subject: [PATCH 10/25] Made every module configurable
---
.../en/using-diffusers/loading_adapters.md | 2 +-
src/diffusers/loaders/lora.py | 5 +-
src/diffusers/loaders/unet.py | 128 +++++++++---------
3 files changed, 71 insertions(+), 64 deletions(-)
diff --git a/docs/source/en/using-diffusers/loading_adapters.md b/docs/source/en/using-diffusers/loading_adapters.md
index 78469519fcd7..583e405e54f0 100644
--- a/docs/source/en/using-diffusers/loading_adapters.md
+++ b/docs/source/en/using-diffusers/loading_adapters.md
@@ -163,7 +163,7 @@ pipeline.unload_lora_weights()
For both [`~loaders.LoraLoaderMixin.load_lora_weights`] and [`~loaders.UNet2DConditionLoadersMixin.load_attn_procs`], you can pass the `cross_attention_kwargs={"scale": 0.5}` parameter to adjust how much of the LoRA weights to use. A value of `0` is the same as only using the base model weights, and a value of `1` is equivalent to using the fully finetuned LoRA.
-For more granular control on the amount of LoRA weights used per layer, you can use [`~loaders.LoraLoaderMixin.set_adapters`] and pass a dictionary specifying how much to scale the weights in each layer by.
+For more granular control on the amount of LoRA weights used per layer, you can use [`~loaders.LoraLoaderMixin.set_adapters`] and pass a dictionary specifying by how much to scale the weights in each layer by.
```python
pipe = ... # create pipeline
pipe.load_lora_weights(..., adapter_name="my_adapter")
diff --git a/src/diffusers/loaders/lora.py b/src/diffusers/loaders/lora.py
index 118a5325bf09..2e5e2ab5b7f6 100644
--- a/src/diffusers/loaders/lora.py
+++ b/src/diffusers/loaders/lora.py
@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+import copy
import inspect
import os
from pathlib import Path
@@ -986,7 +987,7 @@ def process_weights(adapter_names, weights):
f"Length of adapter names {len(adapter_names)} is not equal to the length of the weights {len(weights)}"
)
- weights = [w or 1.0 for w in weights] # Set None values to default of 1.0
+ weights = [w if w is not None else 1.0 for w in weights] # Set None values to default of 1.0
weights = [{"text_model": w} for w in weights]
return weights
@@ -1040,6 +1041,8 @@ def set_adapters(
):
adapter_names = [adapter_names] if isinstance(adapter_names, str) else adapter_names
+ adapter_weights = copy.deepcopy(adapter_weights)
+
# Expand weights into a list, one entry per adapter
if not isinstance(adapter_weights, list):
adapter_weights = [adapter_weights] * len(adapter_names)
diff --git a/src/diffusers/loaders/unet.py b/src/diffusers/loaders/unet.py
index 519714641c26..93ff61a41801 100644
--- a/src/diffusers/loaders/unet.py
+++ b/src/diffusers/loaders/unet.py
@@ -563,24 +563,15 @@ def _unfuse_lora_apply(self, module):
if isinstance(module, BaseTunerLayer):
module.unmerge()
- def _expand_lora_scales_dict(
- self,
- scales: Union[float, Dict],
- blocks_with_transformer: Dict[str, int],
- transformer_per_block: Dict[str, int],
- ):
+ def _expand_lora_scales_dict(self, scales: Union[float, Dict]):
"""
Expands the inputs into a more granular dictionary. See the example below for more details.
Parameters:
scales (`Union[float, Dict]`):
Scales dict to expand.
- blocks_with_transformer (`Dict[str, int]`):
- Dict with keys 'up' and 'down', showing which blocks have transformer layers
- transformer_per_block (`Dict[str, int]`):
- Dict with keys 'up' and 'down', showing how many transformer layers each block has
- E.g. turns
+ E.g. turns todo umer
scales = {
'down': 2,
'mid': 3,
@@ -589,14 +580,7 @@ def _expand_lora_scales_dict(
'block_1': [5, 6, 7]
}
}
- blocks_with_transformer = {
- 'down': [1,2],
- 'up': [0,1]
- }
- transformer_per_block = {
- 'down': 2,
- 'up': 3
- }
+
into
{
'down.block_1.0': 2,
@@ -612,63 +596,83 @@ def _expand_lora_scales_dict(
'up.block_1.2': 7,
}
"""
- if sorted(blocks_with_transformer.keys()) != ["down", "up"]:
- raise ValueError("blocks_with_transformer needs to be a dict with keys `'down' and `'up'`")
-
- if sorted(transformer_per_block.keys()) != ["down", "up"]:
- raise ValueError("transformer_per_block needs to be a dict with keys `'down' and `'up'`")
-
scales = copy.deepcopy(scales)
- if not isinstance(scales, dict):
- scales = {o: scales for o in ["down", "mid", "up"]}
+ def expand_and_set_defaults(inner_dict, keys):
+ if not isinstance(inner_dict, dict):
+ inner_dict = {o: inner_dict for o in keys}
+ for o in keys:
+ inner_dict.setdefault(o, 1.0)
+ return inner_dict
- if "mid" not in scales:
- scales["mid"] = 1
+ # top level
+ scales = expand_and_set_defaults(scales, ["down", "mid", "up"])
- for updown in ["up", "down"]:
- if updown not in scales:
- scales[updown] = 1
+ # down part, block level
+ keys = [f"block_{i}" for i in range(len(self.down_blocks))]
+ scales["down"] = expand_and_set_defaults(scales["down"], keys)
- # eg {"down": 1} to {"down": {"block_1": 1, "block_2": 1}}}
- if not isinstance(scales[updown], dict):
- scales[updown] = {f"block_{i}": scales[updown] for i in blocks_with_transformer[updown]}
+ # up part, block level
+ keys = [f"block_{i}" for i in range(len(self.up_blocks))]
+ scales["up"] = expand_and_set_defaults(scales["up"], keys)
- # eg {"down": "block_1": 1}} to {"down": "block_1": [1, 1]}}
- for i in blocks_with_transformer[updown]:
- block = f"block_{i}"
- if not isinstance(scales[updown][block], list):
- scales[updown][block] = [scales[updown][block] for _ in range(transformer_per_block[updown])]
+ # mid part, layer level
+ mid_resnets = len(self.mid_block.resnets)
+ keys = [f"resnets_{i}" for i in range(mid_resnets)] + [f"attention_{i}" for i in range(mid_resnets - 1)]
+ scales["mid"] = expand_and_set_defaults(scales["mid"], keys)
- # eg {"down": "block_1": [1, 1]}} to {"down.block_1.0": 1, "down.block_1.1": 1}
- for i in blocks_with_transformer[updown]:
- block = f"block_{i}"
- for tf_idx, value in enumerate(scales[updown][block]):
- scales[f"{updown}.{block}.{tf_idx}"] = value
+ # down/up part, layer level
+ for updown in ["up", "down"]:
+ blocks = self.down_blocks if updown == "down" else self.up_blocks
+ updownsampler_name = "downsampler" if updown == "down" else "upsampler"
+
+ for i, block in enumerate(blocks):
+ block_name = f"block_{i}"
+ n_res = len(block.resnets)
+ has_attns = hasattr(block, "attentions") and getattr(block, "attentions") is not None
+ has_updown = hasattr(block, updownsampler_name) and getattr(block, updownsampler_name) is not None
+
+ keys = [f"resnets_{i}" for i in range(n_res)]
+ if has_attns:
+ keys += [f"attention_{i}" for i in range(n_res)]
+ if has_updown:
+ keys += ["downsampler"] if updown == "down" else ["upsampler"]
+
+ scales[updown][block_name] = expand_and_set_defaults(scales[updown][block_name], keys)
+
+ def flatten_dict(d, parent_key="", sep="."):
+ items = []
+ for k, v in d.items():
+ new_key = f"{parent_key}{sep}{k}" if parent_key else k
+ if isinstance(v, dict):
+ items.extend(flatten_dict(v, new_key, sep=sep).items())
+ else:
+ items.append((new_key, v))
+ return dict(items)
- del scales[updown]
+ def module_name(name):
+ """Translate user-friendly name (e.g. 'down.block_0.resnet_0') into actual module name (e.g. 'down_blocks.0.resnets.0')"""
- def layer_name(name):
- """Translate user-friendly name (e.g. 'mid') into actual layer name (e.g. 'mid_block.attentions.0')"""
- if name == "mid":
- return "mid_block.attentions.0"
+ if "mid" in name:
+ upmiddown, module = name.split(".")
+ block = ""
+ else:
+ upmiddown, block, module = name.split(".")
- updown, block, attn = name.split(".")
+ upmiddown = upmiddown.replace("down", "down_blocks").replace("mid", "mid_block").replace("up", "up_blocks")
+ block = ("." + block.replace("block_", "")) if block != "" else ""
+ module = "." + module.replace("_", ".")
- updown = updown.replace("down", "down_blocks").replace("up", "up_blocks")
- block = block.replace("block_", "")
- attn = "attentions." + attn
+ return upmiddown + block + module
- return ".".join((updown, block, attn))
+ scales = flatten_dict(scales)
state_dict = self.state_dict()
- for layer in scales.keys():
- if not any(layer_name(layer) in module for module in state_dict.keys()):
- raise ValueError(
- f"Can't set lora scale for layer {layer}. It either doesn't exist in this unet or it has no attentions."
- )
+ for module in scales.keys():
+ if not any(module_name(module) in module_ for module_ in state_dict.keys()):
+ raise ValueError(f"Can't set lora scale for module {module}. It doesn't exist in this unet.")
- return {layer_name(name): weight for name, weight in scales.items()}
+ return {module_name(k): v for k, v in flatten_dict(scales).items()}
def set_adapters(
self,
@@ -715,7 +719,7 @@ def set_adapters(
f"Length of adapter names {len(adapter_names)} is not equal to the length of their weights {len(weights)}."
)
- weights = [weight or 1.0 for weight in weights] # Set None values to default of 1.0
+ weights = [w if w is not None else 1.0 for w in weights] # Set None values to default of 1.0
blocks_with_transformer = {
"down": [i for i, block in enumerate(self.down_blocks) if hasattr(block, "attentions")],
"up": [i for i, block in enumerate(self.up_blocks) if hasattr(block, "attentions")],
From c2395fae18ca38617624e7830c6088ac2d59ad3f Mon Sep 17 00:00:00 2001
From: UmerHA <40663591+UmerHA@users.noreply.github.com>
Date: Thu, 21 Mar 2024 10:18:45 +0100
Subject: [PATCH 11/25] Adapter to new lora test structure
---
src/diffusers/loaders/unet.py | 126 +-
tests/lora/test_lora_layers_peft.py | 2550 ---------------------------
tests/lora/utils.py | 213 +++
3 files changed, 274 insertions(+), 2615 deletions(-)
delete mode 100644 tests/lora/test_lora_layers_peft.py
diff --git a/src/diffusers/loaders/unet.py b/src/diffusers/loaders/unet.py
index 93ff61a41801..241fa26b857f 100644
--- a/src/diffusers/loaders/unet.py
+++ b/src/diffusers/loaders/unet.py
@@ -563,15 +563,24 @@ def _unfuse_lora_apply(self, module):
if isinstance(module, BaseTunerLayer):
module.unmerge()
- def _expand_lora_scales_dict(self, scales: Union[float, Dict]):
+ def _expand_lora_scales_dict(
+ self,
+ scales: Union[float, Dict],
+ blocks_with_transformer: Dict[str, int],
+ transformer_per_block: Dict[str, int],
+ ):
"""
Expands the inputs into a more granular dictionary. See the example below for more details.
Parameters:
scales (`Union[float, Dict]`):
Scales dict to expand.
+ blocks_with_transformer (`Dict[str, int]`):
+ Dict with keys 'up' and 'down', showing which blocks have transformer layers
+ transformer_per_block (`Dict[str, int]`):
+ Dict with keys 'up' and 'down', showing how many transformer layers each block has
- E.g. turns todo umer
+ E.g. turns
scales = {
'down': 2,
'mid': 3,
@@ -580,7 +589,14 @@ def _expand_lora_scales_dict(self, scales: Union[float, Dict]):
'block_1': [5, 6, 7]
}
}
-
+ blocks_with_transformer = {
+ 'down': [1,2],
+ 'up': [0,1]
+ }
+ transformer_per_block = {
+ 'down': 2,
+ 'up': 3
+ }
into
{
'down.block_1.0': 2,
@@ -596,83 +612,63 @@ def _expand_lora_scales_dict(self, scales: Union[float, Dict]):
'up.block_1.2': 7,
}
"""
+ if sorted(blocks_with_transformer.keys()) != ["down", "up"]:
+ raise ValueError("blocks_with_transformer needs to be a dict with keys `'down' and `'up'`")
+
+ if sorted(transformer_per_block.keys()) != ["down", "up"]:
+ raise ValueError("transformer_per_block needs to be a dict with keys `'down' and `'up'`")
+
scales = copy.deepcopy(scales)
- def expand_and_set_defaults(inner_dict, keys):
- if not isinstance(inner_dict, dict):
- inner_dict = {o: inner_dict for o in keys}
- for o in keys:
- inner_dict.setdefault(o, 1.0)
- return inner_dict
+ if not isinstance(scales, dict):
+ scales = {o: scales for o in ["down", "mid", "up"]}
- # top level
- scales = expand_and_set_defaults(scales, ["down", "mid", "up"])
+ if "mid" not in scales:
+ scales["mid"] = 1
- # down part, block level
- keys = [f"block_{i}" for i in range(len(self.down_blocks))]
- scales["down"] = expand_and_set_defaults(scales["down"], keys)
+ for updown in ["up", "down"]:
+ if updown not in scales:
+ scales[updown] = 1
- # up part, block level
- keys = [f"block_{i}" for i in range(len(self.up_blocks))]
- scales["up"] = expand_and_set_defaults(scales["up"], keys)
+ # eg {"down": 1} to {"down": {"block_1": 1, "block_2": 1}}}
+ if not isinstance(scales[updown], dict):
+ scales[updown] = {f"block_{i}": scales[updown] for i in blocks_with_transformer[updown]}
- # mid part, layer level
- mid_resnets = len(self.mid_block.resnets)
- keys = [f"resnets_{i}" for i in range(mid_resnets)] + [f"attention_{i}" for i in range(mid_resnets - 1)]
- scales["mid"] = expand_and_set_defaults(scales["mid"], keys)
+ # eg {"down": "block_1": 1}} to {"down": "block_1": [1, 1]}}
+ for i in blocks_with_transformer[updown]:
+ block = f"block_{i}"
+ if not isinstance(scales[updown][block], list):
+ scales[updown][block] = [scales[updown][block] for _ in range(transformer_per_block[updown])]
- # down/up part, layer level
- for updown in ["up", "down"]:
- blocks = self.down_blocks if updown == "down" else self.up_blocks
- updownsampler_name = "downsampler" if updown == "down" else "upsampler"
-
- for i, block in enumerate(blocks):
- block_name = f"block_{i}"
- n_res = len(block.resnets)
- has_attns = hasattr(block, "attentions") and getattr(block, "attentions") is not None
- has_updown = hasattr(block, updownsampler_name) and getattr(block, updownsampler_name) is not None
-
- keys = [f"resnets_{i}" for i in range(n_res)]
- if has_attns:
- keys += [f"attention_{i}" for i in range(n_res)]
- if has_updown:
- keys += ["downsampler"] if updown == "down" else ["upsampler"]
-
- scales[updown][block_name] = expand_and_set_defaults(scales[updown][block_name], keys)
-
- def flatten_dict(d, parent_key="", sep="."):
- items = []
- for k, v in d.items():
- new_key = f"{parent_key}{sep}{k}" if parent_key else k
- if isinstance(v, dict):
- items.extend(flatten_dict(v, new_key, sep=sep).items())
- else:
- items.append((new_key, v))
- return dict(items)
+ # eg {"down": "block_1": [1, 1]}} to {"down.block_1.0": 1, "down.block_1.1": 1}
+ for i in blocks_with_transformer[updown]:
+ block = f"block_{i}"
+ for tf_idx, value in enumerate(scales[updown][block]):
+ scales[f"{updown}.{block}.{tf_idx}"] = value
- def module_name(name):
- """Translate user-friendly name (e.g. 'down.block_0.resnet_0') into actual module name (e.g. 'down_blocks.0.resnets.0')"""
+ del scales[updown]
- if "mid" in name:
- upmiddown, module = name.split(".")
- block = ""
- else:
- upmiddown, block, module = name.split(".")
+ def layer_name(name):
+ """Translate user-friendly name (e.g. 'mid') into actual layer name (e.g. 'mid_block.attentions.0')"""
+ if name == "mid":
+ return "mid_block.attentions.0"
- upmiddown = upmiddown.replace("down", "down_blocks").replace("mid", "mid_block").replace("up", "up_blocks")
- block = ("." + block.replace("block_", "")) if block != "" else ""
- module = "." + module.replace("_", ".")
+ updown, block, attn = name.split(".")
- return upmiddown + block + module
+ updown = updown.replace("down", "down_blocks").replace("up", "up_blocks")
+ block = block.replace("block_", "")
+ attn = "attentions." + attn
- scales = flatten_dict(scales)
+ return ".".join((updown, block, attn))
state_dict = self.state_dict()
- for module in scales.keys():
- if not any(module_name(module) in module_ for module_ in state_dict.keys()):
- raise ValueError(f"Can't set lora scale for module {module}. It doesn't exist in this unet.")
+ for layer in scales.keys():
+ if not any(layer_name(layer) in module for module in state_dict.keys()):
+ raise ValueError(
+ f"Can't set lora scale for layer {layer}. It either doesn't exist in this unet or it has no attentions."
+ )
- return {module_name(k): v for k, v in flatten_dict(scales).items()}
+ return {layer_name(name): weight for name, weight in scales.items()}
def set_adapters(
self,
diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py
deleted file mode 100644
index 8d2094131267..000000000000
--- a/tests/lora/test_lora_layers_peft.py
+++ /dev/null
@@ -1,2550 +0,0 @@
-# coding=utf-8
-# Copyright 2024 HuggingFace Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import copy
-import gc
-import importlib
-import os
-import tempfile
-import time
-import unittest
-from itertools import product
-
-import numpy as np
-import torch
-import torch.nn as nn
-from huggingface_hub import hf_hub_download
-from huggingface_hub.repocard import RepoCard
-from packaging import version
-from safetensors.torch import load_file
-from transformers import CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer
-
-from diffusers import (
- AutoencoderKL,
- AutoPipelineForImage2Image,
- AutoPipelineForText2Image,
- ControlNetModel,
- DDIMScheduler,
- DiffusionPipeline,
- EulerDiscreteScheduler,
- LCMScheduler,
- StableDiffusionPipeline,
- StableDiffusionXLAdapterPipeline,
- StableDiffusionXLControlNetPipeline,
- StableDiffusionXLPipeline,
- T2IAdapter,
- UNet2DConditionModel,
-)
-from diffusers.utils.import_utils import is_accelerate_available, is_peft_available
-from diffusers.utils.testing_utils import (
- floats_tensor,
- load_image,
- nightly,
- numpy_cosine_similarity_distance,
- require_peft_backend,
- require_peft_version_greater,
- require_torch_gpu,
- slow,
- torch_device,
-)
-
-
-if is_accelerate_available():
- from accelerate.utils import release_memory
-
-if is_peft_available():
- from peft import LoraConfig
- from peft.tuners.tuners_utils import BaseTunerLayer
- from peft.utils import get_peft_model_state_dict
-
-
-def state_dicts_almost_equal(sd1, sd2):
- sd1 = dict(sorted(sd1.items()))
- sd2 = dict(sorted(sd2.items()))
-
- models_are_equal = True
- for ten1, ten2 in zip(sd1.values(), sd2.values()):
- if (ten1 - ten2).abs().max() > 1e-3:
- models_are_equal = False
-
- return models_are_equal
-
-
-@require_peft_backend
-class PeftLoraLoaderMixinTests:
- torch_device = "cuda" if torch.cuda.is_available() else "cpu"
- pipeline_class = None
- scheduler_cls = None
- scheduler_kwargs = None
- has_two_text_encoders = False
- unet_kwargs = None
- vae_kwargs = None
-
- def get_dummy_components(self, scheduler_cls=None):
- scheduler_cls = self.scheduler_cls if scheduler_cls is None else LCMScheduler
- rank = 4
-
- torch.manual_seed(0)
- unet = UNet2DConditionModel(**self.unet_kwargs)
-
- scheduler = scheduler_cls(**self.scheduler_kwargs)
-
- torch.manual_seed(0)
- vae = AutoencoderKL(**self.vae_kwargs)
-
- text_encoder = CLIPTextModel.from_pretrained("peft-internal-testing/tiny-clip-text-2")
- tokenizer = CLIPTokenizer.from_pretrained("peft-internal-testing/tiny-clip-text-2")
-
- if self.has_two_text_encoders:
- text_encoder_2 = CLIPTextModelWithProjection.from_pretrained("peft-internal-testing/tiny-clip-text-2")
- tokenizer_2 = CLIPTokenizer.from_pretrained("peft-internal-testing/tiny-clip-text-2")
-
- text_lora_config = LoraConfig(
- r=rank,
- lora_alpha=rank,
- target_modules=["q_proj", "k_proj", "v_proj", "out_proj"],
- init_lora_weights=False,
- )
-
- unet_lora_config = LoraConfig(
- r=rank, lora_alpha=rank, target_modules=["to_q", "to_k", "to_v", "to_out.0"], init_lora_weights=False
- )
-
- if self.has_two_text_encoders:
- pipeline_components = {
- "unet": unet,
- "scheduler": scheduler,
- "vae": vae,
- "text_encoder": text_encoder,
- "tokenizer": tokenizer,
- "text_encoder_2": text_encoder_2,
- "tokenizer_2": tokenizer_2,
- "image_encoder": None,
- "feature_extractor": None,
- }
- else:
- pipeline_components = {
- "unet": unet,
- "scheduler": scheduler,
- "vae": vae,
- "text_encoder": text_encoder,
- "tokenizer": tokenizer,
- "safety_checker": None,
- "feature_extractor": None,
- "image_encoder": None,
- }
-
- return pipeline_components, text_lora_config, unet_lora_config
-
- def get_dummy_inputs(self, with_generator=True):
- batch_size = 1
- sequence_length = 10
- num_channels = 4
- sizes = (32, 32)
-
- generator = torch.manual_seed(0)
- noise = floats_tensor((batch_size, num_channels) + sizes)
- input_ids = torch.randint(1, sequence_length, size=(batch_size, sequence_length), generator=generator)
-
- pipeline_inputs = {
- "prompt": "A painting of a squirrel eating a burger",
- "num_inference_steps": 5,
- "guidance_scale": 6.0,
- "output_type": "np",
- }
- if with_generator:
- pipeline_inputs.update({"generator": generator})
-
- return noise, input_ids, pipeline_inputs
-
- # copied from: https://colab.research.google.com/gist/sayakpaul/df2ef6e1ae6d8c10a49d859883b10860/scratchpad.ipynb
- def get_dummy_tokens(self):
- max_seq_length = 77
-
- inputs = torch.randint(2, 56, size=(1, max_seq_length), generator=torch.manual_seed(0))
-
- prepared_inputs = {}
- prepared_inputs["input_ids"] = inputs
- return prepared_inputs
-
- def check_if_lora_correctly_set(self, model) -> bool:
- """
- Checks if the LoRA layers are correctly set with peft
- """
- for module in model.modules():
- if isinstance(module, BaseTunerLayer):
- return True
- return False
-
- def test_simple_inference(self):
- """
- Tests a simple inference and makes sure it works as expected
- """
- for scheduler_cls in [DDIMScheduler, LCMScheduler]:
- components, text_lora_config, _ = self.get_dummy_components(scheduler_cls)
- pipe = self.pipeline_class(**components)
- pipe = pipe.to(self.torch_device)
- pipe.set_progress_bar_config(disable=None)
-
- _, _, inputs = self.get_dummy_inputs()
- output_no_lora = pipe(**inputs).images
- self.assertTrue(output_no_lora.shape == (1, 64, 64, 3))
-
- def test_simple_inference_with_text_lora(self):
- """
- Tests a simple inference with lora attached on the text encoder
- and makes sure it works as expected
- """
- for scheduler_cls in [DDIMScheduler, LCMScheduler]:
- components, text_lora_config, _ = self.get_dummy_components(scheduler_cls)
- pipe = self.pipeline_class(**components)
- pipe = pipe.to(self.torch_device)
- pipe.set_progress_bar_config(disable=None)
- _, _, inputs = self.get_dummy_inputs(with_generator=False)
-
- output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
- self.assertTrue(output_no_lora.shape == (1, 64, 64, 3))
-
- pipe.text_encoder.add_adapter(text_lora_config)
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
- )
-
- if self.has_two_text_encoders:
- pipe.text_encoder_2.add_adapter(text_lora_config)
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
- )
-
- output_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
- self.assertTrue(
- not np.allclose(output_lora, output_no_lora, atol=1e-3, rtol=1e-3), "Lora should change the output"
- )
-
- def test_simple_inference_with_text_lora_and_scale(self):
- """
- Tests a simple inference with lora attached on the text encoder + scale argument
- and makes sure it works as expected
- """
- for scheduler_cls in [DDIMScheduler, LCMScheduler]:
- components, text_lora_config, _ = self.get_dummy_components(scheduler_cls)
- pipe = self.pipeline_class(**components)
- pipe = pipe.to(self.torch_device)
- pipe.set_progress_bar_config(disable=None)
- _, _, inputs = self.get_dummy_inputs(with_generator=False)
-
- output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
- self.assertTrue(output_no_lora.shape == (1, 64, 64, 3))
-
- pipe.text_encoder.add_adapter(text_lora_config)
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
- )
-
- if self.has_two_text_encoders:
- pipe.text_encoder_2.add_adapter(text_lora_config)
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
- )
-
- output_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
- self.assertTrue(
- not np.allclose(output_lora, output_no_lora, atol=1e-3, rtol=1e-3), "Lora should change the output"
- )
-
- output_lora_scale = pipe(
- **inputs, generator=torch.manual_seed(0), cross_attention_kwargs={"scale": 0.5}
- ).images
- self.assertTrue(
- not np.allclose(output_lora, output_lora_scale, atol=1e-3, rtol=1e-3),
- "Lora + scale should change the output",
- )
-
- output_lora_0_scale = pipe(
- **inputs, generator=torch.manual_seed(0), cross_attention_kwargs={"scale": 0.0}
- ).images
- self.assertTrue(
- np.allclose(output_no_lora, output_lora_0_scale, atol=1e-3, rtol=1e-3),
- "Lora + 0 scale should lead to same result as no LoRA",
- )
-
- def test_simple_inference_with_text_lora_fused(self):
- """
- Tests a simple inference with lora attached into text encoder + fuses the lora weights into base model
- and makes sure it works as expected
- """
- for scheduler_cls in [DDIMScheduler, LCMScheduler]:
- components, text_lora_config, _ = self.get_dummy_components(scheduler_cls)
- pipe = self.pipeline_class(**components)
- pipe = pipe.to(self.torch_device)
- pipe.set_progress_bar_config(disable=None)
- _, _, inputs = self.get_dummy_inputs(with_generator=False)
-
- output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
- self.assertTrue(output_no_lora.shape == (1, 64, 64, 3))
-
- pipe.text_encoder.add_adapter(text_lora_config)
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
- )
-
- if self.has_two_text_encoders:
- pipe.text_encoder_2.add_adapter(text_lora_config)
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
- )
-
- pipe.fuse_lora()
- # Fusing should still keep the LoRA layers
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
- )
-
- if self.has_two_text_encoders:
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
- )
-
- ouput_fused = pipe(**inputs, generator=torch.manual_seed(0)).images
- self.assertFalse(
- np.allclose(ouput_fused, output_no_lora, atol=1e-3, rtol=1e-3), "Fused lora should change the output"
- )
-
- def test_simple_inference_with_text_lora_unloaded(self):
- """
- Tests a simple inference with lora attached to text encoder, then unloads the lora weights
- and makes sure it works as expected
- """
- for scheduler_cls in [DDIMScheduler, LCMScheduler]:
- components, text_lora_config, _ = self.get_dummy_components(scheduler_cls)
- pipe = self.pipeline_class(**components)
- pipe = pipe.to(self.torch_device)
- pipe.set_progress_bar_config(disable=None)
- _, _, inputs = self.get_dummy_inputs(with_generator=False)
-
- output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
- self.assertTrue(output_no_lora.shape == (1, 64, 64, 3))
-
- pipe.text_encoder.add_adapter(text_lora_config)
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
- )
-
- if self.has_two_text_encoders:
- pipe.text_encoder_2.add_adapter(text_lora_config)
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
- )
-
- pipe.unload_lora_weights()
- # unloading should remove the LoRA layers
- self.assertFalse(
- self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly unloaded in text encoder"
- )
-
- if self.has_two_text_encoders:
- self.assertFalse(
- self.check_if_lora_correctly_set(pipe.text_encoder_2),
- "Lora not correctly unloaded in text encoder 2",
- )
-
- ouput_unloaded = pipe(**inputs, generator=torch.manual_seed(0)).images
- self.assertTrue(
- np.allclose(ouput_unloaded, output_no_lora, atol=1e-3, rtol=1e-3),
- "Fused lora should change the output",
- )
-
- def test_simple_inference_with_text_lora_save_load(self):
- """
- Tests a simple usecase where users could use saving utilities for LoRA.
- """
- for scheduler_cls in [DDIMScheduler, LCMScheduler]:
- components, text_lora_config, _ = self.get_dummy_components(scheduler_cls)
- pipe = self.pipeline_class(**components)
- pipe = pipe.to(self.torch_device)
- pipe.set_progress_bar_config(disable=None)
- _, _, inputs = self.get_dummy_inputs(with_generator=False)
-
- output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
- self.assertTrue(output_no_lora.shape == (1, 64, 64, 3))
-
- pipe.text_encoder.add_adapter(text_lora_config)
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
- )
-
- if self.has_two_text_encoders:
- pipe.text_encoder_2.add_adapter(text_lora_config)
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
- )
-
- images_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
-
- with tempfile.TemporaryDirectory() as tmpdirname:
- text_encoder_state_dict = get_peft_model_state_dict(pipe.text_encoder)
- if self.has_two_text_encoders:
- text_encoder_2_state_dict = get_peft_model_state_dict(pipe.text_encoder_2)
-
- self.pipeline_class.save_lora_weights(
- save_directory=tmpdirname,
- text_encoder_lora_layers=text_encoder_state_dict,
- text_encoder_2_lora_layers=text_encoder_2_state_dict,
- safe_serialization=False,
- )
- else:
- self.pipeline_class.save_lora_weights(
- save_directory=tmpdirname,
- text_encoder_lora_layers=text_encoder_state_dict,
- safe_serialization=False,
- )
-
- self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.bin")))
- pipe.unload_lora_weights()
-
- pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.bin"))
-
- images_lora_from_pretrained = pipe(**inputs, generator=torch.manual_seed(0)).images
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
- )
-
- if self.has_two_text_encoders:
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
- )
-
- self.assertTrue(
- np.allclose(images_lora, images_lora_from_pretrained, atol=1e-3, rtol=1e-3),
- "Loading from saved checkpoints should give same results.",
- )
-
- def test_simple_inference_save_pretrained(self):
- """
- Tests a simple usecase where users could use saving utilities for LoRA through save_pretrained
- """
- for scheduler_cls in [DDIMScheduler, LCMScheduler]:
- components, text_lora_config, _ = self.get_dummy_components(scheduler_cls)
- pipe = self.pipeline_class(**components)
- pipe = pipe.to(self.torch_device)
- pipe.set_progress_bar_config(disable=None)
- _, _, inputs = self.get_dummy_inputs(with_generator=False)
-
- output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
- self.assertTrue(output_no_lora.shape == (1, 64, 64, 3))
-
- pipe.text_encoder.add_adapter(text_lora_config)
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
- )
-
- if self.has_two_text_encoders:
- pipe.text_encoder_2.add_adapter(text_lora_config)
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
- )
-
- images_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
-
- with tempfile.TemporaryDirectory() as tmpdirname:
- pipe.save_pretrained(tmpdirname)
-
- pipe_from_pretrained = self.pipeline_class.from_pretrained(tmpdirname)
- pipe_from_pretrained.to(self.torch_device)
-
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe_from_pretrained.text_encoder),
- "Lora not correctly set in text encoder",
- )
-
- if self.has_two_text_encoders:
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe_from_pretrained.text_encoder_2),
- "Lora not correctly set in text encoder 2",
- )
-
- images_lora_save_pretrained = pipe_from_pretrained(**inputs, generator=torch.manual_seed(0)).images
-
- self.assertTrue(
- np.allclose(images_lora, images_lora_save_pretrained, atol=1e-3, rtol=1e-3),
- "Loading from saved checkpoints should give same results.",
- )
-
- def test_simple_inference_with_text_unet_lora_save_load(self):
- """
- Tests a simple usecase where users could use saving utilities for LoRA for Unet + text encoder
- """
- for scheduler_cls in [DDIMScheduler, LCMScheduler]:
- components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls)
- pipe = self.pipeline_class(**components)
- pipe = pipe.to(self.torch_device)
- pipe.set_progress_bar_config(disable=None)
- _, _, inputs = self.get_dummy_inputs(with_generator=False)
-
- output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
- self.assertTrue(output_no_lora.shape == (1, 64, 64, 3))
-
- pipe.text_encoder.add_adapter(text_lora_config)
- pipe.unet.add_adapter(unet_lora_config)
-
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
- )
- self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
-
- if self.has_two_text_encoders:
- pipe.text_encoder_2.add_adapter(text_lora_config)
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
- )
-
- images_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
-
- with tempfile.TemporaryDirectory() as tmpdirname:
- text_encoder_state_dict = get_peft_model_state_dict(pipe.text_encoder)
- unet_state_dict = get_peft_model_state_dict(pipe.unet)
- if self.has_two_text_encoders:
- text_encoder_2_state_dict = get_peft_model_state_dict(pipe.text_encoder_2)
-
- self.pipeline_class.save_lora_weights(
- save_directory=tmpdirname,
- text_encoder_lora_layers=text_encoder_state_dict,
- text_encoder_2_lora_layers=text_encoder_2_state_dict,
- unet_lora_layers=unet_state_dict,
- safe_serialization=False,
- )
- else:
- self.pipeline_class.save_lora_weights(
- save_directory=tmpdirname,
- text_encoder_lora_layers=text_encoder_state_dict,
- unet_lora_layers=unet_state_dict,
- safe_serialization=False,
- )
-
- self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.bin")))
- pipe.unload_lora_weights()
-
- pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.bin"))
-
- images_lora_from_pretrained = pipe(**inputs, generator=torch.manual_seed(0)).images
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
- )
- self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
-
- if self.has_two_text_encoders:
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
- )
-
- self.assertTrue(
- np.allclose(images_lora, images_lora_from_pretrained, atol=1e-3, rtol=1e-3),
- "Loading from saved checkpoints should give same results.",
- )
-
- def test_simple_inference_with_text_unet_lora_and_scale(self):
- """
- Tests a simple inference with lora attached on the text encoder + Unet + scale argument
- and makes sure it works as expected
- """
- for scheduler_cls in [DDIMScheduler, LCMScheduler]:
- components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls)
- pipe = self.pipeline_class(**components)
- pipe = pipe.to(self.torch_device)
- pipe.set_progress_bar_config(disable=None)
- _, _, inputs = self.get_dummy_inputs(with_generator=False)
-
- output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
- self.assertTrue(output_no_lora.shape == (1, 64, 64, 3))
-
- pipe.text_encoder.add_adapter(text_lora_config)
- pipe.unet.add_adapter(unet_lora_config)
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
- )
- self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
-
- if self.has_two_text_encoders:
- pipe.text_encoder_2.add_adapter(text_lora_config)
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
- )
-
- output_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
- self.assertTrue(
- not np.allclose(output_lora, output_no_lora, atol=1e-3, rtol=1e-3), "Lora should change the output"
- )
-
- output_lora_scale = pipe(
- **inputs, generator=torch.manual_seed(0), cross_attention_kwargs={"scale": 0.5}
- ).images
- self.assertTrue(
- not np.allclose(output_lora, output_lora_scale, atol=1e-4, rtol=1e-4),
- "Lora + scale should change the output",
- )
-
- output_lora_0_scale = pipe(
- **inputs, generator=torch.manual_seed(0), cross_attention_kwargs={"scale": 0.0}
- ).images
- self.assertTrue(
- np.allclose(output_no_lora, output_lora_0_scale, atol=1e-3, rtol=1e-3),
- "Lora + 0 scale should lead to same result as no LoRA",
- )
-
- self.assertTrue(
- pipe.text_encoder.text_model.encoder.layers[0].self_attn.q_proj.scaling["default"] == 1.0,
- "The scaling parameter has not been correctly restored!",
- )
-
- def test_simple_inference_with_text_lora_unet_fused(self):
- """
- Tests a simple inference with lora attached into text encoder + fuses the lora weights into base model
- and makes sure it works as expected - with unet
- """
- for scheduler_cls in [DDIMScheduler, LCMScheduler]:
- components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls)
- pipe = self.pipeline_class(**components)
- pipe = pipe.to(self.torch_device)
- pipe.set_progress_bar_config(disable=None)
- _, _, inputs = self.get_dummy_inputs(with_generator=False)
-
- output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
- self.assertTrue(output_no_lora.shape == (1, 64, 64, 3))
-
- pipe.text_encoder.add_adapter(text_lora_config)
- pipe.unet.add_adapter(unet_lora_config)
-
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
- )
- self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
-
- if self.has_two_text_encoders:
- pipe.text_encoder_2.add_adapter(text_lora_config)
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
- )
-
- pipe.fuse_lora()
- # Fusing should still keep the LoRA layers
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
- )
- self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in unet")
-
- if self.has_two_text_encoders:
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
- )
-
- ouput_fused = pipe(**inputs, generator=torch.manual_seed(0)).images
- self.assertFalse(
- np.allclose(ouput_fused, output_no_lora, atol=1e-3, rtol=1e-3), "Fused lora should change the output"
- )
-
- def test_simple_inference_with_text_unet_lora_unloaded(self):
- """
- Tests a simple inference with lora attached to text encoder and unet, then unloads the lora weights
- and makes sure it works as expected
- """
- for scheduler_cls in [DDIMScheduler, LCMScheduler]:
- components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls)
- pipe = self.pipeline_class(**components)
- pipe = pipe.to(self.torch_device)
- pipe.set_progress_bar_config(disable=None)
- _, _, inputs = self.get_dummy_inputs(with_generator=False)
-
- output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
- self.assertTrue(output_no_lora.shape == (1, 64, 64, 3))
-
- pipe.text_encoder.add_adapter(text_lora_config)
- pipe.unet.add_adapter(unet_lora_config)
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
- )
- self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
-
- if self.has_two_text_encoders:
- pipe.text_encoder_2.add_adapter(text_lora_config)
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
- )
-
- pipe.unload_lora_weights()
- # unloading should remove the LoRA layers
- self.assertFalse(
- self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly unloaded in text encoder"
- )
- self.assertFalse(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly unloaded in Unet")
-
- if self.has_two_text_encoders:
- self.assertFalse(
- self.check_if_lora_correctly_set(pipe.text_encoder_2),
- "Lora not correctly unloaded in text encoder 2",
- )
-
- ouput_unloaded = pipe(**inputs, generator=torch.manual_seed(0)).images
- self.assertTrue(
- np.allclose(ouput_unloaded, output_no_lora, atol=1e-3, rtol=1e-3),
- "Fused lora should change the output",
- )
-
- def test_simple_inference_with_text_unet_lora_unfused(self):
- """
- Tests a simple inference with lora attached to text encoder and unet, then unloads the lora weights
- and makes sure it works as expected
- """
- for scheduler_cls in [DDIMScheduler, LCMScheduler]:
- components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls)
- pipe = self.pipeline_class(**components)
- pipe = pipe.to(self.torch_device)
- pipe.set_progress_bar_config(disable=None)
- _, _, inputs = self.get_dummy_inputs(with_generator=False)
-
- pipe.text_encoder.add_adapter(text_lora_config)
- pipe.unet.add_adapter(unet_lora_config)
-
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
- )
- self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
-
- if self.has_two_text_encoders:
- pipe.text_encoder_2.add_adapter(text_lora_config)
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
- )
-
- pipe.fuse_lora()
-
- output_fused_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
-
- pipe.unfuse_lora()
-
- output_unfused_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
- # unloading should remove the LoRA layers
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder), "Unfuse should still keep LoRA layers"
- )
- self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Unfuse should still keep LoRA layers")
-
- if self.has_two_text_encoders:
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder_2), "Unfuse should still keep LoRA layers"
- )
-
- # Fuse and unfuse should lead to the same results
- self.assertTrue(
- np.allclose(output_fused_lora, output_unfused_lora, atol=1e-3, rtol=1e-3),
- "Fused lora should change the output",
- )
-
- def test_simple_inference_with_text_unet_multi_adapter(self):
- """
- Tests a simple inference with lora attached to text encoder and unet, attaches
- multiple adapters and set them
- """
- for scheduler_cls in [DDIMScheduler, LCMScheduler]:
- components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls)
- pipe = self.pipeline_class(**components)
- pipe = pipe.to(self.torch_device)
- pipe.set_progress_bar_config(disable=None)
- _, _, inputs = self.get_dummy_inputs(with_generator=False)
-
- output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
-
- pipe.text_encoder.add_adapter(text_lora_config, "adapter-1")
- pipe.text_encoder.add_adapter(text_lora_config, "adapter-2")
-
- pipe.unet.add_adapter(unet_lora_config, "adapter-1")
- pipe.unet.add_adapter(unet_lora_config, "adapter-2")
-
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
- )
- self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
-
- if self.has_two_text_encoders:
- pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1")
- pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-2")
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
- )
-
- pipe.set_adapters("adapter-1")
-
- output_adapter_1 = pipe(**inputs, generator=torch.manual_seed(0)).images
-
- pipe.set_adapters("adapter-2")
- output_adapter_2 = pipe(**inputs, generator=torch.manual_seed(0)).images
-
- pipe.set_adapters(["adapter-1", "adapter-2"])
-
- output_adapter_mixed = pipe(**inputs, generator=torch.manual_seed(0)).images
-
- # Fuse and unfuse should lead to the same results
- self.assertFalse(
- np.allclose(output_adapter_1, output_adapter_2, atol=1e-3, rtol=1e-3),
- "Adapter 1 and 2 should give different results",
- )
-
- self.assertFalse(
- np.allclose(output_adapter_1, output_adapter_mixed, atol=1e-3, rtol=1e-3),
- "Adapter 1 and mixed adapters should give different results",
- )
-
- self.assertFalse(
- np.allclose(output_adapter_2, output_adapter_mixed, atol=1e-3, rtol=1e-3),
- "Adapter 2 and mixed adapters should give different results",
- )
-
- pipe.disable_lora()
-
- output_disabled = pipe(**inputs, generator=torch.manual_seed(0)).images
-
- self.assertTrue(
- np.allclose(output_no_lora, output_disabled, atol=1e-3, rtol=1e-3),
- "output with no lora and output with lora disabled should give same results",
- )
-
- def test_simple_inference_with_text_unet_block_scale(self):
- """
- Tests a simple inference with lora attached to text encoder and unet, attaches
- one adapter and set differnt weights for different blocks (i.e. block lora)
- """
- for scheduler_cls in [DDIMScheduler, LCMScheduler]:
- components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls)
- pipe = self.pipeline_class(**components)
- pipe = pipe.to(self.torch_device)
- pipe.set_progress_bar_config(disable=None)
- _, _, inputs = self.get_dummy_inputs(with_generator=False)
-
- output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
-
- pipe.text_encoder.add_adapter(text_lora_config, "adapter-1")
- pipe.unet.add_adapter(unet_lora_config, "adapter-1")
-
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
- )
- self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
-
- if self.has_two_text_encoders:
- pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1")
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
- )
-
- weights_1 = {
- "unet": {
- "down": 5,
- }
- }
- pipe.set_adapters("adapter-1", weights_1)
- output_weights_1 = pipe(**inputs, generator=torch.manual_seed(0)).images
-
- weights_2 = {
- "unet": {
- "up": 5,
- }
- }
- pipe.set_adapters("adapter-1", weights_2)
- output_weights_2 = pipe(**inputs, generator=torch.manual_seed(0)).images
-
- self.assertFalse(
- np.allclose(output_weights_1, output_weights_2, atol=1e-3, rtol=1e-3),
- "LoRA weights 1 and 2 should give different results",
- )
- self.assertFalse(
- np.allclose(output_no_lora, output_weights_1, atol=1e-3, rtol=1e-3),
- "No adapter and LoRA weights 1 should give different results",
- )
- self.assertFalse(
- np.allclose(output_no_lora, output_weights_2, atol=1e-3, rtol=1e-3),
- "No adapter and LoRA weights 2 should give different results",
- )
-
- pipe.disable_lora()
- output_disabled = pipe(**inputs, generator=torch.manual_seed(0)).images
-
- self.assertTrue(
- np.allclose(output_no_lora, output_disabled, atol=1e-3, rtol=1e-3),
- "output with no lora and output with lora disabled should give same results",
- )
-
- def test_simple_inference_with_text_unet_multi_adapter_block_lora(self):
- """
- Tests a simple inference with lora attached to text encoder and unet, attaches
- multiple adapters and set differnt weights for different blocks (i.e. block lora)
- """
- for scheduler_cls in [DDIMScheduler, LCMScheduler]:
- components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls)
- pipe = self.pipeline_class(**components)
- pipe = pipe.to(self.torch_device)
- pipe.set_progress_bar_config(disable=None)
- _, _, inputs = self.get_dummy_inputs(with_generator=False)
-
- output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
-
- pipe.text_encoder.add_adapter(text_lora_config, "adapter-1")
- pipe.text_encoder.add_adapter(text_lora_config, "adapter-2")
-
- pipe.unet.add_adapter(unet_lora_config, "adapter-1")
- pipe.unet.add_adapter(unet_lora_config, "adapter-2")
-
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
- )
- self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
-
- if self.has_two_text_encoders:
- pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1")
- pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-2")
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
- )
-
- scales_1 = {"unet": {"down": 5}}
- scales_2 = {"unet": {"down": 5, "mid": 5}}
- pipe.set_adapters("adapter-1", scales_1)
-
- output_adapter_1 = pipe(**inputs, generator=torch.manual_seed(0)).images
-
- pipe.set_adapters("adapter-2", scales_2)
- output_adapter_2 = pipe(**inputs, generator=torch.manual_seed(0)).images
-
- pipe.set_adapters(["adapter-1", "adapter-2"], [scales_1, scales_2])
-
- output_adapter_mixed = pipe(**inputs, generator=torch.manual_seed(0)).images
-
- # Fuse and unfuse should lead to the same results
- self.assertFalse(
- np.allclose(output_adapter_1, output_adapter_2, atol=1e-3, rtol=1e-3),
- "Adapter 1 and 2 should give different results",
- )
-
- self.assertFalse(
- np.allclose(output_adapter_1, output_adapter_mixed, atol=1e-3, rtol=1e-3),
- "Adapter 1 and mixed adapters should give different results",
- )
-
- self.assertFalse(
- np.allclose(output_adapter_2, output_adapter_mixed, atol=1e-3, rtol=1e-3),
- "Adapter 2 and mixed adapters should give different results",
- )
-
- pipe.disable_lora()
-
- output_disabled = pipe(**inputs, generator=torch.manual_seed(0)).images
-
- self.assertTrue(
- np.allclose(output_no_lora, output_disabled, atol=1e-3, rtol=1e-3),
- "output with no lora and output with lora disabled should give same results",
- )
-
- def test_simple_inference_with_text_unet_block_scale_for_all_dict_options(self):
- """Tests that any valid combination of lora block scales can be used in pipe.set_adapter"""
-
- def updown_options(blocks_with_tf, layers_per_block, value):
- """
- Generate every possible combination for how a lora weight dict for the up/down part can be.
- E.g. 2, {"block_1": 2}, {"block_1": [2,2,2]}, {"block_1": 2, "block_2": [2,2,2]}, ...
- """
- num_val = value
- list_val = [value] * layers_per_block
-
- node_opts = [None, num_val, list_val]
- node_opts_foreach_block = [node_opts] * len(blocks_with_tf)
-
- updown_opts = [num_val]
- for nodes in product(*node_opts_foreach_block):
- if all(n is None for n in nodes):
- continue
- opt = {}
- for b, n in zip(blocks_with_tf, nodes):
- if n is not None:
- opt["block_" + str(b)] = n
- updown_opts.append(opt)
- return updown_opts
-
- def all_possible_dict_opts(unet, value):
- """
- Generate every possible combination for how a lora weight dict can be.
- E.g. 2, {"unet: {"down": 2}}, {"unet: {"down": [2,2,2]}}, {"unet: {"mid": 2, "up": [2,2,2]}}, ...
- """
-
- down_blocks_with_tf = [i for i, d in enumerate(unet.down_blocks) if hasattr(d, "attentions")]
- up_blocks_with_tf = [i for i, u in enumerate(unet.up_blocks) if hasattr(u, "attentions")]
-
- layers_per_block = unet.config.layers_per_block
-
- text_encoder_opts = [None, value]
- text_encoder_2_opts = [None, value]
- mid_opts = [None, value]
- down_opts = [None] + updown_options(down_blocks_with_tf, layers_per_block, value)
- up_opts = [None] + updown_options(up_blocks_with_tf, layers_per_block + 1, value)
-
- opts = []
-
- for t1, t2, d, m, u in product(text_encoder_opts, text_encoder_2_opts, down_opts, mid_opts, up_opts):
- if all(o is None for o in (t1, t2, d, m, u)):
- continue
- opt = {}
- if t1 is not None:
- opt["text_encoder"] = t1
- if t2 is not None:
- opt["text_encoder_2"] = t2
- if all(o is None for o in (d, m, u)):
- # no unet scaling
- continue
- opt["unet"] = {}
- if d is not None:
- opt["unet"]["down"] = d
- if m is not None:
- opt["unet"]["mid"] = m
- if u is not None:
- opt["unet"]["up"] = u
- opts.append(opt)
-
- return opts
-
- components, text_lora_config, unet_lora_config = self.get_dummy_components(self.scheduler_cls)
- pipe = self.pipeline_class(**components)
- pipe = pipe.to(self.torch_device)
- pipe.set_progress_bar_config(disable=None)
- _, _, inputs = self.get_dummy_inputs(with_generator=False)
-
- pipe.text_encoder.add_adapter(text_lora_config, "adapter-1")
- pipe.unet.add_adapter(unet_lora_config, "adapter-1")
-
- if self.has_two_text_encoders:
- pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1")
-
- for scale_dict in all_possible_dict_opts(pipe.unet, value=1234):
- # test if lora block scales can be set with this scale_dict
- if not self.has_two_text_encoders and "text_encoder_2" in scale_dict:
- del scale_dict["text_encoder_2"]
-
- pipe.set_adapters("adapter-1", scale_dict)
-
- def test_simple_inference_with_text_unet_multi_adapter_delete_adapter(self):
- """
- Tests a simple inference with lora attached to text encoder and unet, attaches
- multiple adapters and set/delete them
- """
- for scheduler_cls in [DDIMScheduler, LCMScheduler]:
- components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls)
- pipe = self.pipeline_class(**components)
- pipe = pipe.to(self.torch_device)
- pipe.set_progress_bar_config(disable=None)
- _, _, inputs = self.get_dummy_inputs(with_generator=False)
-
- output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
-
- pipe.text_encoder.add_adapter(text_lora_config, "adapter-1")
- pipe.text_encoder.add_adapter(text_lora_config, "adapter-2")
-
- pipe.unet.add_adapter(unet_lora_config, "adapter-1")
- pipe.unet.add_adapter(unet_lora_config, "adapter-2")
-
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
- )
- self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
-
- if self.has_two_text_encoders:
- pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1")
- pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-2")
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
- )
-
- pipe.set_adapters("adapter-1")
-
- output_adapter_1 = pipe(**inputs, generator=torch.manual_seed(0)).images
-
- pipe.set_adapters("adapter-2")
- output_adapter_2 = pipe(**inputs, generator=torch.manual_seed(0)).images
-
- pipe.set_adapters(["adapter-1", "adapter-2"])
-
- output_adapter_mixed = pipe(**inputs, generator=torch.manual_seed(0)).images
-
- self.assertFalse(
- np.allclose(output_adapter_1, output_adapter_2, atol=1e-3, rtol=1e-3),
- "Adapter 1 and 2 should give different results",
- )
-
- self.assertFalse(
- np.allclose(output_adapter_1, output_adapter_mixed, atol=1e-3, rtol=1e-3),
- "Adapter 1 and mixed adapters should give different results",
- )
-
- self.assertFalse(
- np.allclose(output_adapter_2, output_adapter_mixed, atol=1e-3, rtol=1e-3),
- "Adapter 2 and mixed adapters should give different results",
- )
-
- pipe.delete_adapters("adapter-1")
- output_deleted_adapter_1 = pipe(**inputs, generator=torch.manual_seed(0)).images
-
- self.assertTrue(
- np.allclose(output_deleted_adapter_1, output_adapter_2, atol=1e-3, rtol=1e-3),
- "Adapter 1 and 2 should give different results",
- )
-
- pipe.delete_adapters("adapter-2")
- output_deleted_adapters = pipe(**inputs, generator=torch.manual_seed(0)).images
-
- self.assertTrue(
- np.allclose(output_no_lora, output_deleted_adapters, atol=1e-3, rtol=1e-3),
- "output with no lora and output with lora disabled should give same results",
- )
-
- pipe.text_encoder.add_adapter(text_lora_config, "adapter-1")
- pipe.text_encoder.add_adapter(text_lora_config, "adapter-2")
-
- pipe.unet.add_adapter(unet_lora_config, "adapter-1")
- pipe.unet.add_adapter(unet_lora_config, "adapter-2")
-
- pipe.set_adapters(["adapter-1", "adapter-2"])
- pipe.delete_adapters(["adapter-1", "adapter-2"])
-
- output_deleted_adapters = pipe(**inputs, generator=torch.manual_seed(0)).images
-
- self.assertTrue(
- np.allclose(output_no_lora, output_deleted_adapters, atol=1e-3, rtol=1e-3),
- "output with no lora and output with lora disabled should give same results",
- )
-
- def test_simple_inference_with_text_unet_multi_adapter_weighted(self):
- """
- Tests a simple inference with lora attached to text encoder and unet, attaches
- multiple adapters and set them
- """
- for scheduler_cls in [DDIMScheduler, LCMScheduler]:
- components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls)
- pipe = self.pipeline_class(**components)
- pipe = pipe.to(self.torch_device)
- pipe.set_progress_bar_config(disable=None)
- _, _, inputs = self.get_dummy_inputs(with_generator=False)
-
- output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
-
- pipe.text_encoder.add_adapter(text_lora_config, "adapter-1")
- pipe.text_encoder.add_adapter(text_lora_config, "adapter-2")
-
- pipe.unet.add_adapter(unet_lora_config, "adapter-1")
- pipe.unet.add_adapter(unet_lora_config, "adapter-2")
-
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
- )
- self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
-
- if self.has_two_text_encoders:
- pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1")
- pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-2")
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
- )
-
- pipe.set_adapters("adapter-1")
-
- output_adapter_1 = pipe(**inputs, generator=torch.manual_seed(0)).images
-
- pipe.set_adapters("adapter-2")
- output_adapter_2 = pipe(**inputs, generator=torch.manual_seed(0)).images
-
- pipe.set_adapters(["adapter-1", "adapter-2"])
-
- output_adapter_mixed = pipe(**inputs, generator=torch.manual_seed(0)).images
-
- # Fuse and unfuse should lead to the same results
- self.assertFalse(
- np.allclose(output_adapter_1, output_adapter_2, atol=1e-3, rtol=1e-3),
- "Adapter 1 and 2 should give different results",
- )
-
- self.assertFalse(
- np.allclose(output_adapter_1, output_adapter_mixed, atol=1e-3, rtol=1e-3),
- "Adapter 1 and mixed adapters should give different results",
- )
-
- self.assertFalse(
- np.allclose(output_adapter_2, output_adapter_mixed, atol=1e-3, rtol=1e-3),
- "Adapter 2 and mixed adapters should give different results",
- )
-
- pipe.set_adapters(["adapter-1", "adapter-2"], [0.5, 0.6])
- output_adapter_mixed_weighted = pipe(**inputs, generator=torch.manual_seed(0)).images
-
- self.assertFalse(
- np.allclose(output_adapter_mixed_weighted, output_adapter_mixed, atol=1e-3, rtol=1e-3),
- "Weighted adapter and mixed adapter should give different results",
- )
-
- pipe.disable_lora()
-
- output_disabled = pipe(**inputs, generator=torch.manual_seed(0)).images
-
- self.assertTrue(
- np.allclose(output_no_lora, output_disabled, atol=1e-3, rtol=1e-3),
- "output with no lora and output with lora disabled should give same results",
- )
-
- def test_lora_fuse_nan(self):
- for scheduler_cls in [DDIMScheduler, LCMScheduler]:
- components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls)
- pipe = self.pipeline_class(**components)
- pipe = pipe.to(self.torch_device)
- pipe.set_progress_bar_config(disable=None)
- _, _, inputs = self.get_dummy_inputs(with_generator=False)
-
- pipe.text_encoder.add_adapter(text_lora_config, "adapter-1")
-
- pipe.unet.add_adapter(unet_lora_config, "adapter-1")
-
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
- )
- self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
-
- # corrupt one LoRA weight with `inf` values
- with torch.no_grad():
- pipe.unet.mid_block.attentions[0].transformer_blocks[0].attn1.to_q.lora_A["adapter-1"].weight += float(
- "inf"
- )
-
- # with `safe_fusing=True` we should see an Error
- with self.assertRaises(ValueError):
- pipe.fuse_lora(safe_fusing=True)
-
- # without we should not see an error, but every image will be black
- pipe.fuse_lora(safe_fusing=False)
-
- out = pipe("test", num_inference_steps=2, output_type="np").images
-
- self.assertTrue(np.isnan(out).all())
-
- def test_get_adapters(self):
- """
- Tests a simple usecase where we attach multiple adapters and check if the results
- are the expected results
- """
- for scheduler_cls in [DDIMScheduler, LCMScheduler]:
- components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls)
- pipe = self.pipeline_class(**components)
- pipe = pipe.to(self.torch_device)
- pipe.set_progress_bar_config(disable=None)
- _, _, inputs = self.get_dummy_inputs(with_generator=False)
-
- pipe.text_encoder.add_adapter(text_lora_config, "adapter-1")
- pipe.unet.add_adapter(unet_lora_config, "adapter-1")
-
- adapter_names = pipe.get_active_adapters()
- self.assertListEqual(adapter_names, ["adapter-1"])
-
- pipe.text_encoder.add_adapter(text_lora_config, "adapter-2")
- pipe.unet.add_adapter(unet_lora_config, "adapter-2")
-
- adapter_names = pipe.get_active_adapters()
- self.assertListEqual(adapter_names, ["adapter-2"])
-
- pipe.set_adapters(["adapter-1", "adapter-2"])
- self.assertListEqual(pipe.get_active_adapters(), ["adapter-1", "adapter-2"])
-
- def test_get_list_adapters(self):
- """
- Tests a simple usecase where we attach multiple adapters and check if the results
- are the expected results
- """
- for scheduler_cls in [DDIMScheduler, LCMScheduler]:
- components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls)
- pipe = self.pipeline_class(**components)
- pipe = pipe.to(self.torch_device)
- pipe.set_progress_bar_config(disable=None)
-
- pipe.text_encoder.add_adapter(text_lora_config, "adapter-1")
- pipe.unet.add_adapter(unet_lora_config, "adapter-1")
-
- adapter_names = pipe.get_list_adapters()
- self.assertDictEqual(adapter_names, {"text_encoder": ["adapter-1"], "unet": ["adapter-1"]})
-
- pipe.text_encoder.add_adapter(text_lora_config, "adapter-2")
- pipe.unet.add_adapter(unet_lora_config, "adapter-2")
-
- adapter_names = pipe.get_list_adapters()
- self.assertDictEqual(
- adapter_names, {"text_encoder": ["adapter-1", "adapter-2"], "unet": ["adapter-1", "adapter-2"]}
- )
-
- pipe.set_adapters(["adapter-1", "adapter-2"])
- self.assertDictEqual(
- pipe.get_list_adapters(),
- {"unet": ["adapter-1", "adapter-2"], "text_encoder": ["adapter-1", "adapter-2"]},
- )
-
- pipe.unet.add_adapter(unet_lora_config, "adapter-3")
- self.assertDictEqual(
- pipe.get_list_adapters(),
- {"unet": ["adapter-1", "adapter-2", "adapter-3"], "text_encoder": ["adapter-1", "adapter-2"]},
- )
-
- @require_peft_version_greater(peft_version="0.6.2")
- def test_simple_inference_with_text_lora_unet_fused_multi(self):
- """
- Tests a simple inference with lora attached into text encoder + fuses the lora weights into base model
- and makes sure it works as expected - with unet and multi-adapter case
- """
- for scheduler_cls in [DDIMScheduler, LCMScheduler]:
- components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls)
- pipe = self.pipeline_class(**components)
- pipe = pipe.to(self.torch_device)
- pipe.set_progress_bar_config(disable=None)
- _, _, inputs = self.get_dummy_inputs(with_generator=False)
-
- output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
- self.assertTrue(output_no_lora.shape == (1, 64, 64, 3))
-
- pipe.text_encoder.add_adapter(text_lora_config, "adapter-1")
- pipe.unet.add_adapter(unet_lora_config, "adapter-1")
-
- # Attach a second adapter
- pipe.text_encoder.add_adapter(text_lora_config, "adapter-2")
- pipe.unet.add_adapter(unet_lora_config, "adapter-2")
-
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
- )
- self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
-
- if self.has_two_text_encoders:
- pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1")
- pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-2")
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
- )
-
- # set them to multi-adapter inference mode
- pipe.set_adapters(["adapter-1", "adapter-2"])
- ouputs_all_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
-
- pipe.set_adapters(["adapter-1"])
- ouputs_lora_1 = pipe(**inputs, generator=torch.manual_seed(0)).images
-
- pipe.fuse_lora(adapter_names=["adapter-1"])
-
- # Fusing should still keep the LoRA layers so outpout should remain the same
- outputs_lora_1_fused = pipe(**inputs, generator=torch.manual_seed(0)).images
-
- self.assertTrue(
- np.allclose(ouputs_lora_1, outputs_lora_1_fused, atol=1e-3, rtol=1e-3),
- "Fused lora should not change the output",
- )
-
- pipe.unfuse_lora()
- pipe.fuse_lora(adapter_names=["adapter-2", "adapter-1"])
-
- # Fusing should still keep the LoRA layers
- output_all_lora_fused = pipe(**inputs, generator=torch.manual_seed(0)).images
- self.assertTrue(
- np.allclose(output_all_lora_fused, ouputs_all_lora, atol=1e-3, rtol=1e-3),
- "Fused lora should not change the output",
- )
-
- @unittest.skip("This is failing for now - need to investigate")
- def test_simple_inference_with_text_unet_lora_unfused_torch_compile(self):
- """
- Tests a simple inference with lora attached to text encoder and unet, then unloads the lora weights
- and makes sure it works as expected
- """
- for scheduler_cls in [DDIMScheduler, LCMScheduler]:
- components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls)
- pipe = self.pipeline_class(**components)
- pipe = pipe.to(self.torch_device)
- pipe.set_progress_bar_config(disable=None)
- _, _, inputs = self.get_dummy_inputs(with_generator=False)
-
- pipe.text_encoder.add_adapter(text_lora_config)
- pipe.unet.add_adapter(unet_lora_config)
-
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
- )
- self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
-
- if self.has_two_text_encoders:
- pipe.text_encoder_2.add_adapter(text_lora_config)
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
- )
-
- pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
- pipe.text_encoder = torch.compile(pipe.text_encoder, mode="reduce-overhead", fullgraph=True)
-
- if self.has_two_text_encoders:
- pipe.text_encoder_2 = torch.compile(pipe.text_encoder_2, mode="reduce-overhead", fullgraph=True)
-
- # Just makes sure it works..
- _ = pipe(**inputs, generator=torch.manual_seed(0)).images
-
- def test_modify_padding_mode(self):
- def set_pad_mode(network, mode="circular"):
- for _, module in network.named_modules():
- if isinstance(module, torch.nn.Conv2d):
- module.padding_mode = mode
-
- for scheduler_cls in [DDIMScheduler, LCMScheduler]:
- components, _, _ = self.get_dummy_components(scheduler_cls)
- pipe = self.pipeline_class(**components)
- pipe = pipe.to(self.torch_device)
- pipe.set_progress_bar_config(disable=None)
- _pad_mode = "circular"
- set_pad_mode(pipe.vae, _pad_mode)
- set_pad_mode(pipe.unet, _pad_mode)
-
- _, _, inputs = self.get_dummy_inputs()
- _ = pipe(**inputs).images
-
-
-class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
- pipeline_class = StableDiffusionPipeline
- scheduler_cls = DDIMScheduler
- scheduler_kwargs = {
- "beta_start": 0.00085,
- "beta_end": 0.012,
- "beta_schedule": "scaled_linear",
- "clip_sample": False,
- "set_alpha_to_one": False,
- "steps_offset": 1,
- }
- unet_kwargs = {
- "block_out_channels": (32, 64),
- "layers_per_block": 2,
- "sample_size": 32,
- "in_channels": 4,
- "out_channels": 4,
- "down_block_types": ("DownBlock2D", "CrossAttnDownBlock2D"),
- "up_block_types": ("CrossAttnUpBlock2D", "UpBlock2D"),
- "cross_attention_dim": 32,
- }
- vae_kwargs = {
- "block_out_channels": [32, 64],
- "in_channels": 3,
- "out_channels": 3,
- "down_block_types": ["DownEncoderBlock2D", "DownEncoderBlock2D"],
- "up_block_types": ["UpDecoderBlock2D", "UpDecoderBlock2D"],
- "latent_channels": 4,
- }
-
- def tearDown(self):
- super().tearDown()
- gc.collect()
- torch.cuda.empty_cache()
-
- @slow
- @require_torch_gpu
- def test_integration_move_lora_cpu(self):
- path = "runwayml/stable-diffusion-v1-5"
- lora_id = "takuma104/lora-test-text-encoder-lora-target"
-
- pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float16)
- pipe.load_lora_weights(lora_id, adapter_name="adapter-1")
- pipe.load_lora_weights(lora_id, adapter_name="adapter-2")
- pipe = pipe.to("cuda")
-
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder),
- "Lora not correctly set in text encoder",
- )
-
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.unet),
- "Lora not correctly set in text encoder",
- )
-
- # We will offload the first adapter in CPU and check if the offloading
- # has been performed correctly
- pipe.set_lora_device(["adapter-1"], "cpu")
-
- for name, module in pipe.unet.named_modules():
- if "adapter-1" in name and not isinstance(module, (nn.Dropout, nn.Identity)):
- self.assertTrue(module.weight.device == torch.device("cpu"))
- elif "adapter-2" in name and not isinstance(module, (nn.Dropout, nn.Identity)):
- self.assertTrue(module.weight.device != torch.device("cpu"))
-
- for name, module in pipe.text_encoder.named_modules():
- if "adapter-1" in name and not isinstance(module, (nn.Dropout, nn.Identity)):
- self.assertTrue(module.weight.device == torch.device("cpu"))
- elif "adapter-2" in name and not isinstance(module, (nn.Dropout, nn.Identity)):
- self.assertTrue(module.weight.device != torch.device("cpu"))
-
- pipe.set_lora_device(["adapter-1"], 0)
-
- for n, m in pipe.unet.named_modules():
- if "adapter-1" in n and not isinstance(m, (nn.Dropout, nn.Identity)):
- self.assertTrue(m.weight.device != torch.device("cpu"))
-
- for n, m in pipe.text_encoder.named_modules():
- if "adapter-1" in n and not isinstance(m, (nn.Dropout, nn.Identity)):
- self.assertTrue(m.weight.device != torch.device("cpu"))
-
- pipe.set_lora_device(["adapter-1", "adapter-2"], "cuda")
-
- for n, m in pipe.unet.named_modules():
- if ("adapter-1" in n or "adapter-2" in n) and not isinstance(m, (nn.Dropout, nn.Identity)):
- self.assertTrue(m.weight.device != torch.device("cpu"))
-
- for n, m in pipe.text_encoder.named_modules():
- if ("adapter-1" in n or "adapter-2" in n) and not isinstance(m, (nn.Dropout, nn.Identity)):
- self.assertTrue(m.weight.device != torch.device("cpu"))
-
- @slow
- @require_torch_gpu
- def test_integration_logits_with_scale(self):
- path = "runwayml/stable-diffusion-v1-5"
- lora_id = "takuma104/lora-test-text-encoder-lora-target"
-
- pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float32)
- pipe.load_lora_weights(lora_id)
- pipe = pipe.to("cuda")
-
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.unet),
- "Lora not correctly set in UNet",
- )
-
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder),
- "Lora not correctly set in text encoder 2",
- )
-
- prompt = "a red sks dog"
-
- images = pipe(
- prompt=prompt,
- num_inference_steps=15,
- cross_attention_kwargs={"scale": 0.5},
- generator=torch.manual_seed(0),
- output_type="np",
- ).images
-
- expected_slice_scale = np.array([0.307, 0.283, 0.310, 0.310, 0.300, 0.314, 0.336, 0.314, 0.321])
-
- predicted_slice = images[0, -3:, -3:, -1].flatten()
-
- self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
-
- @slow
- @require_torch_gpu
- def test_integration_logits_no_scale(self):
- path = "runwayml/stable-diffusion-v1-5"
- lora_id = "takuma104/lora-test-text-encoder-lora-target"
-
- pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float32)
- pipe.load_lora_weights(lora_id)
- pipe = pipe.to("cuda")
-
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.text_encoder),
- "Lora not correctly set in text encoder",
- )
-
- prompt = "a red sks dog"
-
- images = pipe(prompt=prompt, num_inference_steps=30, generator=torch.manual_seed(0), output_type="np").images
-
- expected_slice_scale = np.array([0.074, 0.064, 0.073, 0.0842, 0.069, 0.0641, 0.0794, 0.076, 0.084])
-
- predicted_slice = images[0, -3:, -3:, -1].flatten()
-
- self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
-
- @nightly
- @require_torch_gpu
- def test_integration_logits_multi_adapter(self):
- path = "stabilityai/stable-diffusion-xl-base-1.0"
- lora_id = "CiroN2022/toy-face"
-
- pipe = StableDiffusionXLPipeline.from_pretrained(path, torch_dtype=torch.float16)
- pipe.load_lora_weights(lora_id, weight_name="toy_face_sdxl.safetensors", adapter_name="toy")
- pipe = pipe.to("cuda")
-
- self.assertTrue(
- self.check_if_lora_correctly_set(pipe.unet),
- "Lora not correctly set in Unet",
- )
-
- prompt = "toy_face of a hacker with a hoodie"
-
- lora_scale = 0.9
-
- images = pipe(
- prompt=prompt,
- num_inference_steps=30,
- generator=torch.manual_seed(0),
- cross_attention_kwargs={"scale": lora_scale},
- output_type="np",
- ).images
- expected_slice_scale = np.array([0.538, 0.539, 0.540, 0.540, 0.542, 0.539, 0.538, 0.541, 0.539])
-
- predicted_slice = images[0, -3:, -3:, -1].flatten()
- self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
-
- pipe.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors", adapter_name="pixel")
- pipe.set_adapters("pixel")
-
- prompt = "pixel art, a hacker with a hoodie, simple, flat colors"
- images = pipe(
- prompt,
- num_inference_steps=30,
- guidance_scale=7.5,
- cross_attention_kwargs={"scale": lora_scale},
- generator=torch.manual_seed(0),
- output_type="np",
- ).images
-
- predicted_slice = images[0, -3:, -3:, -1].flatten()
- expected_slice_scale = np.array(
- [0.61973065, 0.62018543, 0.62181497, 0.61933696, 0.6208608, 0.620576, 0.6200281, 0.62258327, 0.6259889]
- )
- self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
-
- # multi-adapter inference
- pipe.set_adapters(["pixel", "toy"], adapter_weights=[0.5, 1.0])
- images = pipe(
- prompt,
- num_inference_steps=30,
- guidance_scale=7.5,
- cross_attention_kwargs={"scale": 1.0},
- generator=torch.manual_seed(0),
- output_type="np",
- ).images
- predicted_slice = images[0, -3:, -3:, -1].flatten()
- expected_slice_scale = np.array([0.5888, 0.5897, 0.5946, 0.5888, 0.5935, 0.5946, 0.5857, 0.5891, 0.5909])
- self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
-
- # Lora disabled
- pipe.disable_lora()
- images = pipe(
- prompt,
- num_inference_steps=30,
- guidance_scale=7.5,
- cross_attention_kwargs={"scale": lora_scale},
- generator=torch.manual_seed(0),
- output_type="np",
- ).images
- predicted_slice = images[0, -3:, -3:, -1].flatten()
- expected_slice_scale = np.array([0.5456, 0.5466, 0.5487, 0.5458, 0.5469, 0.5454, 0.5446, 0.5479, 0.5487])
- self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
-
-
-class StableDiffusionXLLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
- has_two_text_encoders = True
- pipeline_class = StableDiffusionXLPipeline
- scheduler_cls = EulerDiscreteScheduler
- scheduler_kwargs = {
- "beta_start": 0.00085,
- "beta_end": 0.012,
- "beta_schedule": "scaled_linear",
- "timestep_spacing": "leading",
- "steps_offset": 1,
- }
- unet_kwargs = {
- "block_out_channels": (32, 64),
- "layers_per_block": 2,
- "sample_size": 32,
- "in_channels": 4,
- "out_channels": 4,
- "down_block_types": ("DownBlock2D", "CrossAttnDownBlock2D"),
- "up_block_types": ("CrossAttnUpBlock2D", "UpBlock2D"),
- "attention_head_dim": (2, 4),
- "use_linear_projection": True,
- "addition_embed_type": "text_time",
- "addition_time_embed_dim": 8,
- "transformer_layers_per_block": (1, 2),
- "projection_class_embeddings_input_dim": 80, # 6 * 8 + 32
- "cross_attention_dim": 64,
- }
- vae_kwargs = {
- "block_out_channels": [32, 64],
- "in_channels": 3,
- "out_channels": 3,
- "down_block_types": ["DownEncoderBlock2D", "DownEncoderBlock2D"],
- "up_block_types": ["UpDecoderBlock2D", "UpDecoderBlock2D"],
- "latent_channels": 4,
- "sample_size": 128,
- }
-
- def tearDown(self):
- super().tearDown()
- gc.collect()
- torch.cuda.empty_cache()
-
-
-@slow
-@require_torch_gpu
-class LoraIntegrationTests(PeftLoraLoaderMixinTests, unittest.TestCase):
- pipeline_class = StableDiffusionPipeline
- scheduler_cls = DDIMScheduler
- scheduler_kwargs = {
- "beta_start": 0.00085,
- "beta_end": 0.012,
- "beta_schedule": "scaled_linear",
- "clip_sample": False,
- "set_alpha_to_one": False,
- "steps_offset": 1,
- }
- unet_kwargs = {
- "block_out_channels": (32, 64),
- "layers_per_block": 2,
- "sample_size": 32,
- "in_channels": 4,
- "out_channels": 4,
- "down_block_types": ("DownBlock2D", "CrossAttnDownBlock2D"),
- "up_block_types": ("CrossAttnUpBlock2D", "UpBlock2D"),
- "cross_attention_dim": 32,
- }
- vae_kwargs = {
- "block_out_channels": [32, 64],
- "in_channels": 3,
- "out_channels": 3,
- "down_block_types": ["DownEncoderBlock2D", "DownEncoderBlock2D"],
- "up_block_types": ["UpDecoderBlock2D", "UpDecoderBlock2D"],
- "latent_channels": 4,
- }
-
- def tearDown(self):
- super().tearDown()
- gc.collect()
- torch.cuda.empty_cache()
-
- def test_dreambooth_old_format(self):
- generator = torch.Generator("cpu").manual_seed(0)
-
- lora_model_id = "hf-internal-testing/lora_dreambooth_dog_example"
- card = RepoCard.load(lora_model_id)
- base_model_id = card.data.to_dict()["base_model"]
-
- pipe = StableDiffusionPipeline.from_pretrained(base_model_id, safety_checker=None)
- pipe = pipe.to(torch_device)
- pipe.load_lora_weights(lora_model_id)
-
- images = pipe(
- "A photo of a sks dog floating in the river", output_type="np", generator=generator, num_inference_steps=2
- ).images
-
- images = images[0, -3:, -3:, -1].flatten()
-
- expected = np.array([0.7207, 0.6787, 0.6010, 0.7478, 0.6838, 0.6064, 0.6984, 0.6443, 0.5785])
-
- self.assertTrue(np.allclose(images, expected, atol=1e-4))
- release_memory(pipe)
-
- def test_dreambooth_text_encoder_new_format(self):
- generator = torch.Generator().manual_seed(0)
-
- lora_model_id = "hf-internal-testing/lora-trained"
- card = RepoCard.load(lora_model_id)
- base_model_id = card.data.to_dict()["base_model"]
-
- pipe = StableDiffusionPipeline.from_pretrained(base_model_id, safety_checker=None)
- pipe = pipe.to(torch_device)
- pipe.load_lora_weights(lora_model_id)
-
- images = pipe("A photo of a sks dog", output_type="np", generator=generator, num_inference_steps=2).images
-
- images = images[0, -3:, -3:, -1].flatten()
-
- expected = np.array([0.6628, 0.6138, 0.5390, 0.6625, 0.6130, 0.5463, 0.6166, 0.5788, 0.5359])
-
- self.assertTrue(np.allclose(images, expected, atol=1e-4))
- release_memory(pipe)
-
- def test_a1111(self):
- generator = torch.Generator().manual_seed(0)
-
- pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None).to(
- torch_device
- )
- lora_model_id = "hf-internal-testing/civitai-light-shadow-lora"
- lora_filename = "light_and_shadow.safetensors"
- pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
-
- images = pipe(
- "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
- ).images
-
- images = images[0, -3:, -3:, -1].flatten()
- expected = np.array([0.3636, 0.3708, 0.3694, 0.3679, 0.3829, 0.3677, 0.3692, 0.3688, 0.3292])
-
- self.assertTrue(np.allclose(images, expected, atol=1e-3))
- release_memory(pipe)
-
- def test_lycoris(self):
- generator = torch.Generator().manual_seed(0)
-
- pipe = StableDiffusionPipeline.from_pretrained(
- "hf-internal-testing/Amixx", safety_checker=None, use_safetensors=True, variant="fp16"
- ).to(torch_device)
- lora_model_id = "hf-internal-testing/edgLycorisMugler-light"
- lora_filename = "edgLycorisMugler-light.safetensors"
- pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
-
- images = pipe(
- "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
- ).images
-
- images = images[0, -3:, -3:, -1].flatten()
- expected = np.array([0.6463, 0.658, 0.599, 0.6542, 0.6512, 0.6213, 0.658, 0.6485, 0.6017])
-
- self.assertTrue(np.allclose(images, expected, atol=1e-3))
- release_memory(pipe)
-
- def test_a1111_with_model_cpu_offload(self):
- generator = torch.Generator().manual_seed(0)
-
- pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None)
- pipe.enable_model_cpu_offload()
- lora_model_id = "hf-internal-testing/civitai-light-shadow-lora"
- lora_filename = "light_and_shadow.safetensors"
- pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
-
- images = pipe(
- "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
- ).images
-
- images = images[0, -3:, -3:, -1].flatten()
- expected = np.array([0.3636, 0.3708, 0.3694, 0.3679, 0.3829, 0.3677, 0.3692, 0.3688, 0.3292])
-
- self.assertTrue(np.allclose(images, expected, atol=1e-3))
- release_memory(pipe)
-
- def test_a1111_with_sequential_cpu_offload(self):
- generator = torch.Generator().manual_seed(0)
-
- pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None)
- pipe.enable_sequential_cpu_offload()
- lora_model_id = "hf-internal-testing/civitai-light-shadow-lora"
- lora_filename = "light_and_shadow.safetensors"
- pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
-
- images = pipe(
- "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
- ).images
-
- images = images[0, -3:, -3:, -1].flatten()
- expected = np.array([0.3636, 0.3708, 0.3694, 0.3679, 0.3829, 0.3677, 0.3692, 0.3688, 0.3292])
-
- self.assertTrue(np.allclose(images, expected, atol=1e-3))
- release_memory(pipe)
-
- def test_kohya_sd_v15_with_higher_dimensions(self):
- generator = torch.Generator().manual_seed(0)
-
- pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", safety_checker=None).to(
- torch_device
- )
- lora_model_id = "hf-internal-testing/urushisato-lora"
- lora_filename = "urushisato_v15.safetensors"
- pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
-
- images = pipe(
- "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
- ).images
-
- images = images[0, -3:, -3:, -1].flatten()
- expected = np.array([0.7165, 0.6616, 0.5833, 0.7504, 0.6718, 0.587, 0.6871, 0.6361, 0.5694])
-
- self.assertTrue(np.allclose(images, expected, atol=1e-3))
- release_memory(pipe)
-
- def test_vanilla_funetuning(self):
- generator = torch.Generator().manual_seed(0)
-
- lora_model_id = "hf-internal-testing/sd-model-finetuned-lora-t4"
- card = RepoCard.load(lora_model_id)
- base_model_id = card.data.to_dict()["base_model"]
-
- pipe = StableDiffusionPipeline.from_pretrained(base_model_id, safety_checker=None)
- pipe = pipe.to(torch_device)
- pipe.load_lora_weights(lora_model_id)
-
- images = pipe("A pokemon with blue eyes.", output_type="np", generator=generator, num_inference_steps=2).images
-
- images = images[0, -3:, -3:, -1].flatten()
-
- expected = np.array([0.7406, 0.699, 0.5963, 0.7493, 0.7045, 0.6096, 0.6886, 0.6388, 0.583])
-
- self.assertTrue(np.allclose(images, expected, atol=1e-4))
- release_memory(pipe)
-
- def test_unload_kohya_lora(self):
- generator = torch.manual_seed(0)
- prompt = "masterpiece, best quality, mountain"
- num_inference_steps = 2
-
- pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", safety_checker=None).to(
- torch_device
- )
- initial_images = pipe(
- prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
- ).images
- initial_images = initial_images[0, -3:, -3:, -1].flatten()
-
- lora_model_id = "hf-internal-testing/civitai-colored-icons-lora"
- lora_filename = "Colored_Icons_by_vizsumit.safetensors"
-
- pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
- generator = torch.manual_seed(0)
- lora_images = pipe(
- prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
- ).images
- lora_images = lora_images[0, -3:, -3:, -1].flatten()
-
- pipe.unload_lora_weights()
- generator = torch.manual_seed(0)
- unloaded_lora_images = pipe(
- prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
- ).images
- unloaded_lora_images = unloaded_lora_images[0, -3:, -3:, -1].flatten()
-
- self.assertFalse(np.allclose(initial_images, lora_images))
- self.assertTrue(np.allclose(initial_images, unloaded_lora_images, atol=1e-3))
- release_memory(pipe)
-
- def test_load_unload_load_kohya_lora(self):
- # This test ensures that a Kohya-style LoRA can be safely unloaded and then loaded
- # without introducing any side-effects. Even though the test uses a Kohya-style
- # LoRA, the underlying adapter handling mechanism is format-agnostic.
- generator = torch.manual_seed(0)
- prompt = "masterpiece, best quality, mountain"
- num_inference_steps = 2
-
- pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", safety_checker=None).to(
- torch_device
- )
- initial_images = pipe(
- prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
- ).images
- initial_images = initial_images[0, -3:, -3:, -1].flatten()
-
- lora_model_id = "hf-internal-testing/civitai-colored-icons-lora"
- lora_filename = "Colored_Icons_by_vizsumit.safetensors"
-
- pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
- generator = torch.manual_seed(0)
- lora_images = pipe(
- prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
- ).images
- lora_images = lora_images[0, -3:, -3:, -1].flatten()
-
- pipe.unload_lora_weights()
- generator = torch.manual_seed(0)
- unloaded_lora_images = pipe(
- prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
- ).images
- unloaded_lora_images = unloaded_lora_images[0, -3:, -3:, -1].flatten()
-
- self.assertFalse(np.allclose(initial_images, lora_images))
- self.assertTrue(np.allclose(initial_images, unloaded_lora_images, atol=1e-3))
-
- # make sure we can load a LoRA again after unloading and they don't have
- # any undesired effects.
- pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
- generator = torch.manual_seed(0)
- lora_images_again = pipe(
- prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
- ).images
- lora_images_again = lora_images_again[0, -3:, -3:, -1].flatten()
-
- self.assertTrue(np.allclose(lora_images, lora_images_again, atol=1e-3))
- release_memory(pipe)
-
- def test_not_empty_state_dict(self):
- # Makes sure https://github.com/huggingface/diffusers/issues/7054 does not happen again
- pipe = AutoPipelineForText2Image.from_pretrained(
- "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16
- ).to("cuda")
- pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
-
- cached_file = hf_hub_download("hf-internal-testing/lcm-lora-test-sd-v1-5", "test_lora.safetensors")
- lcm_lora = load_file(cached_file)
-
- pipe.load_lora_weights(lcm_lora, adapter_name="lcm")
- self.assertTrue(lcm_lora != {})
- release_memory(pipe)
-
- def test_load_unload_load_state_dict(self):
- # Makes sure https://github.com/huggingface/diffusers/issues/7054 does not happen again
- pipe = AutoPipelineForText2Image.from_pretrained(
- "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16
- ).to("cuda")
- pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
-
- cached_file = hf_hub_download("hf-internal-testing/lcm-lora-test-sd-v1-5", "test_lora.safetensors")
- lcm_lora = load_file(cached_file)
- previous_state_dict = lcm_lora.copy()
-
- pipe.load_lora_weights(lcm_lora, adapter_name="lcm")
- self.assertDictEqual(lcm_lora, previous_state_dict)
-
- pipe.unload_lora_weights()
- pipe.load_lora_weights(lcm_lora, adapter_name="lcm")
- self.assertDictEqual(lcm_lora, previous_state_dict)
-
- release_memory(pipe)
-
-
-@slow
-@require_torch_gpu
-class LoraSDXLIntegrationTests(PeftLoraLoaderMixinTests, unittest.TestCase):
- has_two_text_encoders = True
- pipeline_class = StableDiffusionXLPipeline
- scheduler_cls = EulerDiscreteScheduler
- scheduler_kwargs = {
- "beta_start": 0.00085,
- "beta_end": 0.012,
- "beta_schedule": "scaled_linear",
- "timestep_spacing": "leading",
- "steps_offset": 1,
- }
- unet_kwargs = {
- "block_out_channels": (32, 64),
- "layers_per_block": 2,
- "sample_size": 32,
- "in_channels": 4,
- "out_channels": 4,
- "down_block_types": ("DownBlock2D", "CrossAttnDownBlock2D"),
- "up_block_types": ("CrossAttnUpBlock2D", "UpBlock2D"),
- "attention_head_dim": (2, 4),
- "use_linear_projection": True,
- "addition_embed_type": "text_time",
- "addition_time_embed_dim": 8,
- "transformer_layers_per_block": (1, 2),
- "projection_class_embeddings_input_dim": 80, # 6 * 8 + 32
- "cross_attention_dim": 64,
- }
- vae_kwargs = {
- "block_out_channels": [32, 64],
- "in_channels": 3,
- "out_channels": 3,
- "down_block_types": ["DownEncoderBlock2D", "DownEncoderBlock2D"],
- "up_block_types": ["UpDecoderBlock2D", "UpDecoderBlock2D"],
- "latent_channels": 4,
- "sample_size": 128,
- }
-
- def tearDown(self):
- super().tearDown()
- gc.collect()
- torch.cuda.empty_cache()
-
- def test_sdxl_0_9_lora_one(self):
- generator = torch.Generator().manual_seed(0)
-
- pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9")
- lora_model_id = "hf-internal-testing/sdxl-0.9-daiton-lora"
- lora_filename = "daiton-xl-lora-test.safetensors"
- pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
- pipe.enable_model_cpu_offload()
-
- images = pipe(
- "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
- ).images
-
- images = images[0, -3:, -3:, -1].flatten()
- expected = np.array([0.3838, 0.3482, 0.3588, 0.3162, 0.319, 0.3369, 0.338, 0.3366, 0.3213])
-
- self.assertTrue(np.allclose(images, expected, atol=1e-3))
- release_memory(pipe)
-
- def test_sdxl_0_9_lora_two(self):
- generator = torch.Generator().manual_seed(0)
-
- pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9")
- lora_model_id = "hf-internal-testing/sdxl-0.9-costumes-lora"
- lora_filename = "saijo.safetensors"
- pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
- pipe.enable_model_cpu_offload()
-
- images = pipe(
- "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
- ).images
-
- images = images[0, -3:, -3:, -1].flatten()
- expected = np.array([0.3137, 0.3269, 0.3355, 0.255, 0.2577, 0.2563, 0.2679, 0.2758, 0.2626])
-
- self.assertTrue(np.allclose(images, expected, atol=1e-3))
- release_memory(pipe)
-
- def test_sdxl_0_9_lora_three(self):
- generator = torch.Generator().manual_seed(0)
-
- pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9")
- lora_model_id = "hf-internal-testing/sdxl-0.9-kamepan-lora"
- lora_filename = "kame_sdxl_v2-000020-16rank.safetensors"
- pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
- pipe.enable_model_cpu_offload()
-
- images = pipe(
- "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
- ).images
-
- images = images[0, -3:, -3:, -1].flatten()
- expected = np.array([0.4015, 0.3761, 0.3616, 0.3745, 0.3462, 0.3337, 0.3564, 0.3649, 0.3468])
-
- self.assertTrue(np.allclose(images, expected, atol=5e-3))
- release_memory(pipe)
-
- def test_sdxl_1_0_lora(self):
- generator = torch.Generator("cpu").manual_seed(0)
-
- pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
- pipe.enable_model_cpu_offload()
- lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
- lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
- pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
-
- images = pipe(
- "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
- ).images
-
- images = images[0, -3:, -3:, -1].flatten()
- expected = np.array([0.4468, 0.4087, 0.4134, 0.366, 0.3202, 0.3505, 0.3786, 0.387, 0.3535])
-
- self.assertTrue(np.allclose(images, expected, atol=1e-4))
- release_memory(pipe)
-
- def test_sdxl_lcm_lora(self):
- pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
- pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
- pipe.enable_model_cpu_offload()
-
- generator = torch.Generator("cpu").manual_seed(0)
-
- lora_model_id = "latent-consistency/lcm-lora-sdxl"
-
- pipe.load_lora_weights(lora_model_id)
-
- image = pipe(
- "masterpiece, best quality, mountain", generator=generator, num_inference_steps=4, guidance_scale=0.5
- ).images[0]
-
- expected_image = load_image(
- "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/lcm_lora/sdxl_lcm_lora.png"
- )
-
- image_np = pipe.image_processor.pil_to_numpy(image)
- expected_image_np = pipe.image_processor.pil_to_numpy(expected_image)
-
- max_diff = numpy_cosine_similarity_distance(image_np.flatten(), expected_image_np.flatten())
- assert max_diff < 1e-4
-
- pipe.unload_lora_weights()
-
- release_memory(pipe)
-
- def test_sdv1_5_lcm_lora(self):
- pipe = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16)
- pipe.to("cuda")
- pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
-
- generator = torch.Generator("cpu").manual_seed(0)
-
- lora_model_id = "latent-consistency/lcm-lora-sdv1-5"
- pipe.load_lora_weights(lora_model_id)
-
- image = pipe(
- "masterpiece, best quality, mountain", generator=generator, num_inference_steps=4, guidance_scale=0.5
- ).images[0]
-
- expected_image = load_image(
- "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/lcm_lora/sdv15_lcm_lora.png"
- )
-
- image_np = pipe.image_processor.pil_to_numpy(image)
- expected_image_np = pipe.image_processor.pil_to_numpy(expected_image)
-
- max_diff = numpy_cosine_similarity_distance(image_np.flatten(), expected_image_np.flatten())
- assert max_diff < 1e-4
-
- pipe.unload_lora_weights()
-
- release_memory(pipe)
-
- def test_sdv1_5_lcm_lora_img2img(self):
- pipe = AutoPipelineForImage2Image.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16)
- pipe.to("cuda")
- pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
-
- init_image = load_image(
- "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/img2img/fantasy_landscape.png"
- )
-
- generator = torch.Generator("cpu").manual_seed(0)
-
- lora_model_id = "latent-consistency/lcm-lora-sdv1-5"
- pipe.load_lora_weights(lora_model_id)
-
- image = pipe(
- "snowy mountain",
- generator=generator,
- image=init_image,
- strength=0.5,
- num_inference_steps=4,
- guidance_scale=0.5,
- ).images[0]
-
- expected_image = load_image(
- "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/lcm_lora/sdv15_lcm_lora_img2img.png"
- )
-
- image_np = pipe.image_processor.pil_to_numpy(image)
- expected_image_np = pipe.image_processor.pil_to_numpy(expected_image)
-
- max_diff = numpy_cosine_similarity_distance(image_np.flatten(), expected_image_np.flatten())
- assert max_diff < 1e-4
-
- pipe.unload_lora_weights()
-
- release_memory(pipe)
-
- def test_sdxl_1_0_lora_fusion(self):
- generator = torch.Generator().manual_seed(0)
-
- pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
- lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
- lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
- pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
-
- pipe.fuse_lora()
- # We need to unload the lora weights since in the previous API `fuse_lora` led to lora weights being
- # silently deleted - otherwise this will CPU OOM
- pipe.unload_lora_weights()
-
- pipe.enable_model_cpu_offload()
-
- images = pipe(
- "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
- ).images
-
- images = images[0, -3:, -3:, -1].flatten()
- # This way we also test equivalence between LoRA fusion and the non-fusion behaviour.
- expected = np.array([0.4468, 0.4087, 0.4134, 0.366, 0.3202, 0.3505, 0.3786, 0.387, 0.3535])
-
- self.assertTrue(np.allclose(images, expected, atol=1e-4))
- release_memory(pipe)
-
- def test_sdxl_1_0_lora_unfusion(self):
- generator = torch.Generator("cpu").manual_seed(0)
-
- pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
- lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
- lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
- pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
- pipe.fuse_lora()
-
- pipe.enable_model_cpu_offload()
-
- images = pipe(
- "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=3
- ).images
- images_with_fusion = images.flatten()
-
- pipe.unfuse_lora()
- generator = torch.Generator("cpu").manual_seed(0)
- images = pipe(
- "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=3
- ).images
- images_without_fusion = images.flatten()
-
- max_diff = numpy_cosine_similarity_distance(images_with_fusion, images_without_fusion)
- assert max_diff < 1e-4
-
- release_memory(pipe)
-
- def test_sdxl_1_0_lora_unfusion_effectivity(self):
- pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
- pipe.enable_model_cpu_offload()
-
- generator = torch.Generator().manual_seed(0)
- images = pipe(
- "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
- ).images
- original_image_slice = images[0, -3:, -3:, -1].flatten()
-
- lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
- lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
- pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
- pipe.fuse_lora()
-
- generator = torch.Generator().manual_seed(0)
- _ = pipe(
- "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
- ).images
-
- pipe.unfuse_lora()
-
- # We need to unload the lora weights - in the old API unfuse led to unloading the adapter weights
- pipe.unload_lora_weights()
-
- generator = torch.Generator().manual_seed(0)
- images = pipe(
- "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
- ).images
- images_without_fusion_slice = images[0, -3:, -3:, -1].flatten()
-
- self.assertTrue(np.allclose(original_image_slice, images_without_fusion_slice, atol=1e-3))
- release_memory(pipe)
-
- def test_sdxl_1_0_lora_fusion_efficiency(self):
- generator = torch.Generator().manual_seed(0)
- lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
- lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
-
- pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
- pipe.load_lora_weights(lora_model_id, weight_name=lora_filename, torch_dtype=torch.float16)
- pipe.enable_model_cpu_offload()
-
- start_time = time.time()
- for _ in range(3):
- pipe(
- "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
- ).images
- end_time = time.time()
- elapsed_time_non_fusion = end_time - start_time
-
- del pipe
-
- pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
- pipe.load_lora_weights(lora_model_id, weight_name=lora_filename, torch_dtype=torch.float16)
- pipe.fuse_lora()
-
- # We need to unload the lora weights since in the previous API `fuse_lora` led to lora weights being
- # silently deleted - otherwise this will CPU OOM
- pipe.unload_lora_weights()
- pipe.enable_model_cpu_offload()
-
- generator = torch.Generator().manual_seed(0)
- start_time = time.time()
- for _ in range(3):
- pipe(
- "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
- ).images
- end_time = time.time()
- elapsed_time_fusion = end_time - start_time
-
- self.assertTrue(elapsed_time_fusion < elapsed_time_non_fusion)
- release_memory(pipe)
-
- def test_sdxl_1_0_last_ben(self):
- generator = torch.Generator().manual_seed(0)
-
- pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
- pipe.enable_model_cpu_offload()
- lora_model_id = "TheLastBen/Papercut_SDXL"
- lora_filename = "papercut.safetensors"
- pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
-
- images = pipe("papercut.safetensors", output_type="np", generator=generator, num_inference_steps=2).images
-
- images = images[0, -3:, -3:, -1].flatten()
- expected = np.array([0.5244, 0.4347, 0.4312, 0.4246, 0.4398, 0.4409, 0.4884, 0.4938, 0.4094])
-
- self.assertTrue(np.allclose(images, expected, atol=1e-3))
- release_memory(pipe)
-
- def test_sdxl_1_0_fuse_unfuse_all(self):
- pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
- text_encoder_1_sd = copy.deepcopy(pipe.text_encoder.state_dict())
- text_encoder_2_sd = copy.deepcopy(pipe.text_encoder_2.state_dict())
- unet_sd = copy.deepcopy(pipe.unet.state_dict())
-
- pipe.load_lora_weights(
- "davizca87/sun-flower", weight_name="snfw3rXL-000004.safetensors", torch_dtype=torch.float16
- )
-
- fused_te_state_dict = pipe.text_encoder.state_dict()
- fused_te_2_state_dict = pipe.text_encoder_2.state_dict()
- unet_state_dict = pipe.unet.state_dict()
-
- peft_ge_070 = version.parse(importlib.metadata.version("peft")) >= version.parse("0.7.0")
-
- def remap_key(key, sd):
- # some keys have moved around for PEFT >= 0.7.0, but they should still be loaded correctly
- if (key in sd) or (not peft_ge_070):
- return key
-
- # instead of linear.weight, we now have linear.base_layer.weight, etc.
- if key.endswith(".weight"):
- key = key[:-7] + ".base_layer.weight"
- elif key.endswith(".bias"):
- key = key[:-5] + ".base_layer.bias"
- return key
-
- for key, value in text_encoder_1_sd.items():
- key = remap_key(key, fused_te_state_dict)
- self.assertTrue(torch.allclose(fused_te_state_dict[key], value))
-
- for key, value in text_encoder_2_sd.items():
- key = remap_key(key, fused_te_2_state_dict)
- self.assertTrue(torch.allclose(fused_te_2_state_dict[key], value))
-
- for key, value in unet_state_dict.items():
- self.assertTrue(torch.allclose(unet_state_dict[key], value))
-
- pipe.fuse_lora()
- pipe.unload_lora_weights()
-
- assert not state_dicts_almost_equal(text_encoder_1_sd, pipe.text_encoder.state_dict())
- assert not state_dicts_almost_equal(text_encoder_2_sd, pipe.text_encoder_2.state_dict())
- assert not state_dicts_almost_equal(unet_sd, pipe.unet.state_dict())
- release_memory(pipe)
- del unet_sd, text_encoder_1_sd, text_encoder_2_sd
-
- def test_sdxl_1_0_lora_with_sequential_cpu_offloading(self):
- generator = torch.Generator().manual_seed(0)
-
- pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
- pipe.enable_sequential_cpu_offload()
- lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
- lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
-
- pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
-
- images = pipe(
- "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
- ).images
-
- images = images[0, -3:, -3:, -1].flatten()
- expected = np.array([0.4468, 0.4087, 0.4134, 0.366, 0.3202, 0.3505, 0.3786, 0.387, 0.3535])
-
- self.assertTrue(np.allclose(images, expected, atol=1e-3))
- release_memory(pipe)
-
- def test_sd_load_civitai_empty_network_alpha(self):
- """
- This test simply checks that loading a LoRA with an empty network alpha works fine
- See: https://github.com/huggingface/diffusers/issues/5606
- """
- pipeline = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5").to("cuda")
- pipeline.enable_sequential_cpu_offload()
- civitai_path = hf_hub_download("ybelkada/test-ahi-civitai", "ahi_lora_weights.safetensors")
- pipeline.load_lora_weights(civitai_path, adapter_name="ahri")
-
- images = pipeline(
- "ahri, masterpiece, league of legends",
- output_type="np",
- generator=torch.manual_seed(156),
- num_inference_steps=5,
- ).images
- images = images[0, -3:, -3:, -1].flatten()
- expected = np.array([0.0, 0.0, 0.0, 0.002557, 0.020954, 0.001792, 0.006581, 0.00591, 0.002995])
-
- self.assertTrue(np.allclose(images, expected, atol=1e-3))
- release_memory(pipeline)
-
- def test_controlnet_canny_lora(self):
- controlnet = ControlNetModel.from_pretrained("diffusers/controlnet-canny-sdxl-1.0")
-
- pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
- "stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet
- )
- pipe.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors")
- pipe.enable_sequential_cpu_offload()
-
- generator = torch.Generator(device="cpu").manual_seed(0)
- prompt = "corgi"
- image = load_image(
- "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/bird_canny.png"
- )
-
- images = pipe(prompt, image=image, generator=generator, output_type="np", num_inference_steps=3).images
-
- assert images[0].shape == (768, 512, 3)
-
- original_image = images[0, -3:, -3:, -1].flatten()
- expected_image = np.array([0.4574, 0.4461, 0.4435, 0.4462, 0.4396, 0.439, 0.4474, 0.4486, 0.4333])
- assert np.allclose(original_image, expected_image, atol=1e-04)
- release_memory(pipe)
-
- def test_sdxl_t2i_adapter_canny_lora(self):
- adapter = T2IAdapter.from_pretrained("TencentARC/t2i-adapter-lineart-sdxl-1.0", torch_dtype=torch.float16).to(
- "cpu"
- )
- pipe = StableDiffusionXLAdapterPipeline.from_pretrained(
- "stabilityai/stable-diffusion-xl-base-1.0",
- adapter=adapter,
- torch_dtype=torch.float16,
- variant="fp16",
- )
- pipe.load_lora_weights("CiroN2022/toy-face", weight_name="toy_face_sdxl.safetensors")
- pipe.enable_model_cpu_offload()
- pipe.set_progress_bar_config(disable=None)
-
- generator = torch.Generator(device="cpu").manual_seed(0)
- prompt = "toy"
- image = load_image(
- "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/t2i_adapter/toy_canny.png"
- )
-
- images = pipe(prompt, image=image, generator=generator, output_type="np", num_inference_steps=3).images
-
- assert images[0].shape == (768, 512, 3)
-
- image_slice = images[0, -3:, -3:, -1].flatten()
- expected_slice = np.array([0.4284, 0.4337, 0.4319, 0.4255, 0.4329, 0.4280, 0.4338, 0.4420, 0.4226])
- assert numpy_cosine_similarity_distance(image_slice, expected_slice) < 1e-4
-
- @nightly
- def test_sequential_fuse_unfuse(self):
- pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
-
- # 1. round
- pipe.load_lora_weights("Pclanglais/TintinIA", torch_dtype=torch.float16)
- pipe.to("cuda")
- pipe.fuse_lora()
-
- generator = torch.Generator().manual_seed(0)
- images = pipe(
- "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
- ).images
- image_slice = images[0, -3:, -3:, -1].flatten()
-
- pipe.unfuse_lora()
-
- # 2. round
- pipe.load_lora_weights("ProomptEngineer/pe-balloon-diffusion-style", torch_dtype=torch.float16)
- pipe.fuse_lora()
- pipe.unfuse_lora()
-
- # 3. round
- pipe.load_lora_weights("ostris/crayon_style_lora_sdxl", torch_dtype=torch.float16)
- pipe.fuse_lora()
- pipe.unfuse_lora()
-
- # 4. back to 1st round
- pipe.load_lora_weights("Pclanglais/TintinIA", torch_dtype=torch.float16)
- pipe.fuse_lora()
-
- generator = torch.Generator().manual_seed(0)
- images_2 = pipe(
- "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
- ).images
- image_slice_2 = images_2[0, -3:, -3:, -1].flatten()
-
- self.assertTrue(np.allclose(image_slice, image_slice_2, atol=1e-3))
- release_memory(pipe)
diff --git a/tests/lora/utils.py b/tests/lora/utils.py
index da0da0ad8ae4..afebddc60d21 100644
--- a/tests/lora/utils.py
+++ b/tests/lora/utils.py
@@ -15,6 +15,7 @@
import os
import tempfile
import unittest
+from itertools import product
import numpy as np
import torch
@@ -756,6 +757,218 @@ def test_simple_inference_with_text_unet_multi_adapter(self):
"output with no lora and output with lora disabled should give same results",
)
+ def test_simple_inference_with_text_unet_block_scale(self):
+ """
+ Tests a simple inference with lora attached to text encoder and unet, attaches
+ one adapter and set differnt weights for different blocks (i.e. block lora)
+ """
+ for scheduler_cls in [DDIMScheduler, LCMScheduler]:
+ components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls)
+ pipe = self.pipeline_class(**components)
+ pipe = pipe.to(torch_device)
+ pipe.set_progress_bar_config(disable=None)
+ _, _, inputs = self.get_dummy_inputs(with_generator=False)
+
+ output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
+
+ pipe.text_encoder.add_adapter(text_lora_config, "adapter-1")
+ pipe.unet.add_adapter(unet_lora_config, "adapter-1")
+
+ self.assertTrue(
+ check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
+ )
+ self.assertTrue(check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
+
+ if self.has_two_text_encoders:
+ pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1")
+ self.assertTrue(
+ check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
+ )
+
+ weights_1 = { "unet": { "down": 5 } }
+ pipe.set_adapters("adapter-1", weights_1)
+ output_weights_1 = pipe(**inputs, generator=torch.manual_seed(0)).images
+
+ weights_2 = { "unet": { "up": 5 } }
+ pipe.set_adapters("adapter-1", weights_2)
+ output_weights_2 = pipe(**inputs, generator=torch.manual_seed(0)).images
+
+ self.assertFalse(
+ np.allclose(output_weights_1, output_weights_2, atol=1e-3, rtol=1e-3),
+ "LoRA weights 1 and 2 should give different results",
+ )
+ self.assertFalse(
+ np.allclose(output_no_lora, output_weights_1, atol=1e-3, rtol=1e-3),
+ "No adapter and LoRA weights 1 should give different results",
+ )
+ self.assertFalse(
+ np.allclose(output_no_lora, output_weights_2, atol=1e-3, rtol=1e-3),
+ "No adapter and LoRA weights 2 should give different results",
+ )
+
+ pipe.disable_lora()
+ output_disabled = pipe(**inputs, generator=torch.manual_seed(0)).images
+
+ self.assertTrue(
+ np.allclose(output_no_lora, output_disabled, atol=1e-3, rtol=1e-3),
+ "output with no lora and output with lora disabled should give same results",
+ )
+
+ def test_simple_inference_with_text_unet_multi_adapter_block_lora(self):
+ """
+ Tests a simple inference with lora attached to text encoder and unet, attaches
+ multiple adapters and set differnt weights for different blocks (i.e. block lora)
+ """
+ for scheduler_cls in [DDIMScheduler, LCMScheduler]:
+ components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls)
+ pipe = self.pipeline_class(**components)
+ pipe = pipe.to(torch_device)
+ pipe.set_progress_bar_config(disable=None)
+ _, _, inputs = self.get_dummy_inputs(with_generator=False)
+
+ output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
+
+ pipe.text_encoder.add_adapter(text_lora_config, "adapter-1")
+ pipe.text_encoder.add_adapter(text_lora_config, "adapter-2")
+
+ pipe.unet.add_adapter(unet_lora_config, "adapter-1")
+ pipe.unet.add_adapter(unet_lora_config, "adapter-2")
+
+ self.assertTrue(
+ check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
+ )
+ self.assertTrue(check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
+
+ if self.has_two_text_encoders:
+ pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1")
+ pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-2")
+ self.assertTrue(
+ check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
+ )
+
+ scales_1 = {"unet": {"down": 5}}
+ scales_2 = {"unet": {"down": 5, "mid": 5}}
+ pipe.set_adapters("adapter-1", scales_1)
+
+ output_adapter_1 = pipe(**inputs, generator=torch.manual_seed(0)).images
+
+ pipe.set_adapters("adapter-2", scales_2)
+ output_adapter_2 = pipe(**inputs, generator=torch.manual_seed(0)).images
+
+ pipe.set_adapters(["adapter-1", "adapter-2"], [scales_1, scales_2])
+
+ output_adapter_mixed = pipe(**inputs, generator=torch.manual_seed(0)).images
+
+ # Fuse and unfuse should lead to the same results
+ self.assertFalse(
+ np.allclose(output_adapter_1, output_adapter_2, atol=1e-3, rtol=1e-3),
+ "Adapter 1 and 2 should give different results",
+ )
+
+ self.assertFalse(
+ np.allclose(output_adapter_1, output_adapter_mixed, atol=1e-3, rtol=1e-3),
+ "Adapter 1 and mixed adapters should give different results",
+ )
+
+ self.assertFalse(
+ np.allclose(output_adapter_2, output_adapter_mixed, atol=1e-3, rtol=1e-3),
+ "Adapter 2 and mixed adapters should give different results",
+ )
+
+ pipe.disable_lora()
+
+ output_disabled = pipe(**inputs, generator=torch.manual_seed(0)).images
+
+ self.assertTrue(
+ np.allclose(output_no_lora, output_disabled, atol=1e-3, rtol=1e-3),
+ "output with no lora and output with lora disabled should give same results",
+ )
+
+ def test_simple_inference_with_text_unet_block_scale_for_all_dict_options(self):
+ """Tests that any valid combination of lora block scales can be used in pipe.set_adapter"""
+
+ def updown_options(blocks_with_tf, layers_per_block, value):
+ """
+ Generate every possible combination for how a lora weight dict for the up/down part can be.
+ E.g. 2, {"block_1": 2}, {"block_1": [2,2,2]}, {"block_1": 2, "block_2": [2,2,2]}, ...
+ """
+ num_val = value
+ list_val = [value] * layers_per_block
+
+ node_opts = [None, num_val, list_val]
+ node_opts_foreach_block = [node_opts] * len(blocks_with_tf)
+
+ updown_opts = [num_val]
+ for nodes in product(*node_opts_foreach_block):
+ if all(n is None for n in nodes):
+ continue
+ opt = {}
+ for b, n in zip(blocks_with_tf, nodes):
+ if n is not None:
+ opt["block_" + str(b)] = n
+ updown_opts.append(opt)
+ return updown_opts
+
+ def all_possible_dict_opts(unet, value):
+ """
+ Generate every possible combination for how a lora weight dict can be.
+ E.g. 2, {"unet: {"down": 2}}, {"unet: {"down": [2,2,2]}}, {"unet: {"mid": 2, "up": [2,2,2]}}, ...
+ """
+
+ down_blocks_with_tf = [i for i, d in enumerate(unet.down_blocks) if hasattr(d, "attentions")]
+ up_blocks_with_tf = [i for i, u in enumerate(unet.up_blocks) if hasattr(u, "attentions")]
+
+ layers_per_block = unet.config.layers_per_block
+
+ text_encoder_opts = [None, value]
+ text_encoder_2_opts = [None, value]
+ mid_opts = [None, value]
+ down_opts = [None] + updown_options(down_blocks_with_tf, layers_per_block, value)
+ up_opts = [None] + updown_options(up_blocks_with_tf, layers_per_block + 1, value)
+
+ opts = []
+
+ for t1, t2, d, m, u in product(text_encoder_opts, text_encoder_2_opts, down_opts, mid_opts, up_opts):
+ if all(o is None for o in (t1, t2, d, m, u)):
+ continue
+ opt = {}
+ if t1 is not None:
+ opt["text_encoder"] = t1
+ if t2 is not None:
+ opt["text_encoder_2"] = t2
+ if all(o is None for o in (d, m, u)):
+ # no unet scaling
+ continue
+ opt["unet"] = {}
+ if d is not None:
+ opt["unet"]["down"] = d
+ if m is not None:
+ opt["unet"]["mid"] = m
+ if u is not None:
+ opt["unet"]["up"] = u
+ opts.append(opt)
+
+ return opts
+
+ components, text_lora_config, unet_lora_config = self.get_dummy_components(self.scheduler_cls)
+ pipe = self.pipeline_class(**components)
+ pipe = pipe.to(torch_device)
+ pipe.set_progress_bar_config(disable=None)
+ _, _, inputs = self.get_dummy_inputs(with_generator=False)
+
+ pipe.text_encoder.add_adapter(text_lora_config, "adapter-1")
+ pipe.unet.add_adapter(unet_lora_config, "adapter-1")
+
+ if self.has_two_text_encoders:
+ pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1")
+
+ for scale_dict in all_possible_dict_opts(pipe.unet, value=1234):
+ # test if lora block scales can be set with this scale_dict
+ if not self.has_two_text_encoders and "text_encoder_2" in scale_dict:
+ del scale_dict["text_encoder_2"]
+
+ pipe.set_adapters("adapter-1", scale_dict)
+
def test_simple_inference_with_text_unet_multi_adapter_delete_adapter(self):
"""
Tests a simple inference with lora attached to text encoder and unet, attaches
From 624b2dd78e5a5de4e4b1a035ff2ebd040a4aa8b4 Mon Sep 17 00:00:00 2001
From: UmerHA <40663591+UmerHA@users.noreply.github.com>
Date: Thu, 21 Mar 2024 11:47:25 +0100
Subject: [PATCH 12/25] Final cleanup
---
.../en/using-diffusers/loading_adapters.md | 10 +++
src/diffusers/loaders/lora.py | 22 +++---
src/diffusers/loaders/unet.py | 67 +++++++++++--------
tests/lora/utils.py | 16 ++---
4 files changed, 68 insertions(+), 47 deletions(-)
diff --git a/docs/source/en/using-diffusers/loading_adapters.md b/docs/source/en/using-diffusers/loading_adapters.md
index 583e405e54f0..4c57b59e0717 100644
--- a/docs/source/en/using-diffusers/loading_adapters.md
+++ b/docs/source/en/using-diffusers/loading_adapters.md
@@ -182,6 +182,16 @@ scales = {
pipe.set_adapters("my_adapter", scales)
```
+This also works with multiple adapters:
+```python
+pipe = ... # create pipeline
+pipe.load_lora_weights(..., adapter_name="my_adapter_1")
+pipe.load_lora_weights(..., adapter_name="my_adapter_2")
+scales_1 = { ... }
+scales_2 = { ... }
+pipe.set_adapters(["my_adapter_1", "my_adapter_2"], [scales_1, scales_2])
+```
+
### Kohya and TheLastBen
Other popular LoRA trainers from the community include those by [Kohya](https://github.com/kohya-ss/sd-scripts/) and [TheLastBen](https://github.com/TheLastBen/fast-stable-diffusion). These trainers create different LoRA checkpoints than those trained by 🤗 Diffusers, but they can still be loaded in the same way.
diff --git a/src/diffusers/loaders/lora.py b/src/diffusers/loaders/lora.py
index 2e5e2ab5b7f6..881f34e53ffe 100644
--- a/src/diffusers/loaders/lora.py
+++ b/src/diffusers/loaders/lora.py
@@ -979,6 +979,8 @@ def set_adapters_for_text_encoder(
raise ValueError("PEFT backend is required for this method.")
def process_weights(adapter_names, weights):
+ # Expand weights into a list, one entry per adapter
+ # e.g. for 2 adapters: 7 -> [7,7] ; [3, None] -> [3, None]
if not isinstance(weights, list):
weights = [weights] * len(adapter_names)
@@ -987,7 +989,11 @@ def process_weights(adapter_names, weights):
f"Length of adapter names {len(adapter_names)} is not equal to the length of the weights {len(weights)}"
)
- weights = [w if w is not None else 1.0 for w in weights] # Set None values to default of 1.0
+ # Set None values to default of 1.0
+ # e.g. [7,7] -> [7,7] ; [3, None] -> [3,1]
+ weights = [w if w is not None else 1.0 for w in weights]
+
+ # e.g. [3,1] -> [{"text_model": 3} , {"text_model": 1} ]
weights = [{"text_model": w} for w in weights]
return weights
@@ -1053,7 +1059,7 @@ def set_adapters(
)
# Decompose weights into weights for unet, text_encoder and text_encoder_2
- unet_weights, text_encoder_weights, text_encoder_2_weights = [], [], []
+ unet_lora_weights, text_encoder_lora_weights, text_encoder_2_lora_weights = [], [], []
for adapter_name, weights in zip(adapter_names, adapter_weights):
if isinstance(weights, dict):
@@ -1075,19 +1081,19 @@ def set_adapters(
text_encoder_weight = weights
text_encoder_2_weight = weights
- unet_weights.append(unet_weight)
- text_encoder_weights.append(text_encoder_weight)
- text_encoder_2_weights.append(text_encoder_2_weight)
+ unet_lora_weights.append(unet_weight)
+ text_encoder_lora_weights.append(text_encoder_weight)
+ text_encoder_2_lora_weights.append(text_encoder_2_weight)
unet = getattr(self, self.unet_name) if not hasattr(self, "unet") else self.unet
# Handle the UNET
- unet.set_adapters(adapter_names, unet_weights)
+ unet.set_adapters(adapter_names, unet_lora_weights)
# Handle the Text Encoder
if hasattr(self, "text_encoder"):
- self.set_adapters_for_text_encoder(adapter_names, self.text_encoder, text_encoder_weights)
+ self.set_adapters_for_text_encoder(adapter_names, self.text_encoder, text_encoder_lora_weights)
if hasattr(self, "text_encoder_2"):
- self.set_adapters_for_text_encoder(adapter_names, self.text_encoder_2, text_encoder_2_weights)
+ self.set_adapters_for_text_encoder(adapter_names, self.text_encoder_2, text_encoder_2_lora_weights)
def disable_lora(self):
if not USE_PEFT_BACKEND:
diff --git a/src/diffusers/loaders/unet.py b/src/diffusers/loaders/unet.py
index 241fa26b857f..ac45d72338d7 100644
--- a/src/diffusers/loaders/unet.py
+++ b/src/diffusers/loaders/unet.py
@@ -581,36 +581,40 @@ def _expand_lora_scales_dict(
Dict with keys 'up' and 'down', showing how many transformer layers each block has
E.g. turns
- scales = {
- 'down': 2,
- 'mid': 3,
- 'up': {
- 'block_0': 4,
- 'block_1': [5, 6, 7]
- }
- }
- blocks_with_transformer = {
- 'down': [1,2],
- 'up': [0,1]
- }
- transformer_per_block = {
- 'down': 2,
- 'up': 3
+ ```python
+ scales = {
+ 'down': 2,
+ 'mid': 3,
+ 'up': {
+ 'block_0': 4,
+ 'block_1': [5, 6, 7]
}
+ }
+ blocks_with_transformer = {
+ 'down': [1,2],
+ 'up': [0,1]
+ }
+ transformer_per_block = {
+ 'down': 2,
+ 'up': 3
+ }
+ ```
into
- {
- 'down.block_1.0': 2,
- 'down.block_1.1': 2,
- 'down.block_2.0': 2,
- 'down.block_2.1': 2,
- 'mid': 3,
- 'up.block_0.0': 4,
- 'up.block_0.1': 4,
- 'up.block_0.2': 4,
- 'up.block_1.0': 5,
- 'up.block_1.1': 6,
- 'up.block_1.2': 7,
- }
+ ```python
+ {
+ 'down.block_1.0': 2,
+ 'down.block_1.1': 2,
+ 'down.block_2.0': 2,
+ 'down.block_2.1': 2,
+ 'mid': 3,
+ 'up.block_0.0': 4,
+ 'up.block_0.1': 4,
+ 'up.block_0.2': 4,
+ 'up.block_1.0': 5,
+ 'up.block_1.1': 6,
+ 'up.block_1.2': 7,
+ }
+ ```
"""
if sorted(blocks_with_transformer.keys()) != ["down", "up"]:
raise ValueError("blocks_with_transformer needs to be a dict with keys `'down' and `'up'`")
@@ -707,6 +711,7 @@ def set_adapters(
adapter_names = [adapter_names] if isinstance(adapter_names, str) else adapter_names
# Expand weights into a list, one entry per adapter
+ # examples for e.g. 2 adapters: [7,7] -> [7,7] ; None -> [None, None]
if not isinstance(weights, list):
weights = [weights] * len(adapter_names)
@@ -715,13 +720,17 @@ def set_adapters(
f"Length of adapter names {len(adapter_names)} is not equal to the length of their weights {len(weights)}."
)
- weights = [w if w is not None else 1.0 for w in weights] # Set None values to default of 1.0
+ # Set None values to default of 1.0
+ # e.g. [7,7] -> [7,7] ; [None, None] -> [1.0, 1.0]
+ weights = [w if w is not None else 1.0 for w in weights]
+
blocks_with_transformer = {
"down": [i for i, block in enumerate(self.down_blocks) if hasattr(block, "attentions")],
"up": [i for i, block in enumerate(self.up_blocks) if hasattr(block, "attentions")],
}
transformer_per_block = {"down": self.config.layers_per_block, "up": self.config.layers_per_block + 1}
+ # e.g. [7,7] -> [{...}, {...}]
weights = [
self._expand_lora_scales_dict(weight_for_adapter, blocks_with_transformer, transformer_per_block)
for weight_for_adapter in weights
diff --git a/tests/lora/utils.py b/tests/lora/utils.py
index afebddc60d21..9f3c46538d0b 100644
--- a/tests/lora/utils.py
+++ b/tests/lora/utils.py
@@ -774,9 +774,7 @@ def test_simple_inference_with_text_unet_block_scale(self):
pipe.text_encoder.add_adapter(text_lora_config, "adapter-1")
pipe.unet.add_adapter(unet_lora_config, "adapter-1")
- self.assertTrue(
- check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
- )
+ self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
self.assertTrue(check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
if self.has_two_text_encoders:
@@ -785,11 +783,11 @@ def test_simple_inference_with_text_unet_block_scale(self):
check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
)
- weights_1 = { "unet": { "down": 5 } }
+ weights_1 = {"text_encoder": 2, "unet": {"down": 5}}
pipe.set_adapters("adapter-1", weights_1)
output_weights_1 = pipe(**inputs, generator=torch.manual_seed(0)).images
- weights_2 = { "unet": { "up": 5 } }
+ weights_2 = {"unet": {"up": 5}}
pipe.set_adapters("adapter-1", weights_2)
output_weights_2 = pipe(**inputs, generator=torch.manual_seed(0)).images
@@ -834,9 +832,7 @@ def test_simple_inference_with_text_unet_multi_adapter_block_lora(self):
pipe.unet.add_adapter(unet_lora_config, "adapter-1")
pipe.unet.add_adapter(unet_lora_config, "adapter-2")
- self.assertTrue(
- check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
- )
+ self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
self.assertTrue(check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
if self.has_two_text_encoders:
@@ -846,7 +842,7 @@ def test_simple_inference_with_text_unet_multi_adapter_block_lora(self):
check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
)
- scales_1 = {"unet": {"down": 5}}
+ scales_1 = {"text_encoder": 2, "unet": {"down": 5}}
scales_2 = {"unet": {"down": 5, "mid": 5}}
pipe.set_adapters("adapter-1", scales_1)
@@ -967,7 +963,7 @@ def all_possible_dict_opts(unet, value):
if not self.has_two_text_encoders and "text_encoder_2" in scale_dict:
del scale_dict["text_encoder_2"]
- pipe.set_adapters("adapter-1", scale_dict)
+ pipe.set_adapters("adapter-1", scale_dict) # test will fail if this line throws an error
def test_simple_inference_with_text_unet_multi_adapter_delete_adapter(self):
"""
From 0b32d64be7a007e6aab36f4692d1212a0e23121c Mon Sep 17 00:00:00 2001
From: UmerHA <40663591+UmerHA@users.noreply.github.com>
Date: Thu, 21 Mar 2024 15:11:18 +0100
Subject: [PATCH 13/25] Some more final fixes
- Included examples in `using_peft_for_inference.md`
- Added hint that only attns are scaled
- Removed NoneTypes
- Added test to check mismatching lens of adapter names / weights raise error
---
.../en/tutorials/using_peft_for_inference.md | 63 +++++++++++++++++++
.../en/using-diffusers/loading_adapters.md | 6 ++
src/diffusers/loaders/lora.py | 3 +-
src/diffusers/loaders/unet.py | 3 +-
tests/lora/utils.py | 5 ++
5 files changed, 76 insertions(+), 4 deletions(-)
diff --git a/docs/source/en/tutorials/using_peft_for_inference.md b/docs/source/en/tutorials/using_peft_for_inference.md
index 1e12c2a870ac..581c8c077751 100644
--- a/docs/source/en/tutorials/using_peft_for_inference.md
+++ b/docs/source/en/tutorials/using_peft_for_inference.md
@@ -132,6 +132,69 @@ image = pipe(prompt, num_inference_steps=30, generator=torch.manual_seed(0)).ima
image
```
+### Customize adapters strength
+For even more customization, you can control how strongly the adapter affects each part of the pipeline. For this, pass a dictionary with the control strengths (called "scales") to [`~diffusers.loaders.UNet2DConditionLoadersMixin.set_adapters`].
+
+For example, here's how you can turn on the adapter for the `text_encoder` and `down` parts, but turn it off for the `mid` and `up` parts:
+```python
+pipe.enable_lora() # enable lora again, after we disabled it above
+
+adapter_weight_scales = {
+ "text_encoder": 1,
+ "unet": { "down": 1, "mid": 0, "up": 0}
+}
+pipe.set_adapters("pixel", adapter_weight_scales)
+prompt = "a hacker with a hoodie"
+image = pipe(prompt, num_inference_steps=30, generator=torch.manual_seed(0)).images[0]
+image
+```
+
+
+Let's see how turning off the `down` part and turning on the `mid` and `up` part respectively changes the image.
+```python
+adapter_weight_scales = {
+ "text_encoder": 1,
+ "unet": { "down": 0, "mid": 1, "up": 0}
+}
+pipe.set_adapters("pixel", adapter_weight_scales)
+image = pipe(prompt, num_inference_steps=30, generator=torch.manual_seed(0)).images[0]
+image
+```
+
+
+```python
+adapter_weight_scales = {
+ "text_encoder": 1,
+ "unet": { "down": 0, "mid": 1, "up": 0}
+}
+pipe.set_adapters("pixel", adapter_weight_scales)
+image = pipe(prompt, num_inference_steps=30, generator=torch.manual_seed(0)).images[0]
+image
+```
+
+
+Looks cool!
+
+This is a really powerful feature. You can use it to control the adapter strengths down to per-transformer level. And you can even use it for multiple adapters.
+```python
+adapter_weight_scales_toy = 0.5
+adapter_weight_scales_pixel = {
+ "text_encoder": 0.5,
+ "unet": {
+ "down": 0.9, # all transformers in the down-part will use scale 0.9
+ # "mid" # because, in this example, "mid" is not given, all transformers in the mid part will use the default scale 1.0
+ "up": {
+ "block_0": 0.6, # all 3 transformers in the 0th block in the up-part will use scale 0.6
+ "block_1": [0.4, 0.8, 1.0], # the 3 transformers in the 1st block in the up-part will use scales 0.4, 0.8 and 1.0 respectively
+ }
+ }
+}
+pipe.set_adapters(["toy", "pixel"], [adapter_weight_scales_toy, adapter_weight_scales_pixel]])
+image = pipe(prompt, num_inference_steps=30, generator=torch.manual_seed(0)).images[0]
+image
+```
+
+
## Manage active adapters
You have attached multiple adapters in this tutorial, and if you're feeling a bit lost on what adapters have been attached to the pipeline's components, use the [`~diffusers.loaders.LoraLoaderMixin.get_active_adapters`] method to check the list of active adapters:
diff --git a/docs/source/en/using-diffusers/loading_adapters.md b/docs/source/en/using-diffusers/loading_adapters.md
index 4c57b59e0717..e2ef7e80fa43 100644
--- a/docs/source/en/using-diffusers/loading_adapters.md
+++ b/docs/source/en/using-diffusers/loading_adapters.md
@@ -192,6 +192,12 @@ scales_2 = { ... }
pipe.set_adapters(["my_adapter_1", "my_adapter_2"], [scales_1, scales_2])
```
+
+
+Currently, [`~loaders.LoraLoaderMixin.set_adapters`] only supports scaling attention weights. If a LoRa has other parts (e.g., resnets or down-/upsamplers), they will keep a scale of 1.0.
+
+
+
### Kohya and TheLastBen
Other popular LoRA trainers from the community include those by [Kohya](https://github.com/kohya-ss/sd-scripts/) and [TheLastBen](https://github.com/TheLastBen/fast-stable-diffusion). These trainers create different LoRA checkpoints than those trained by 🤗 Diffusers, but they can still be loaded in the same way.
diff --git a/src/diffusers/loaders/lora.py b/src/diffusers/loaders/lora.py
index 881f34e53ffe..e1def27e42b9 100644
--- a/src/diffusers/loaders/lora.py
+++ b/src/diffusers/loaders/lora.py
@@ -15,7 +15,6 @@
import inspect
import os
from pathlib import Path
-from types import NoneType
from typing import Callable, Dict, List, Optional, Union
import safetensors
@@ -961,7 +960,7 @@ def set_adapters_for_text_encoder(
self,
adapter_names: Union[List[str], str],
text_encoder: Optional["PreTrainedModel"] = None, # noqa: F821
- text_encoder_weights: Optional[Union[float, List[float], List[NoneType]]] = None,
+ text_encoder_weights: Optional[Union[float, List[float], List[None]]] = None,
):
"""
Sets the adapter layers for the text encoder.
diff --git a/src/diffusers/loaders/unet.py b/src/diffusers/loaders/unet.py
index ac45d72338d7..6cad6437a24a 100644
--- a/src/diffusers/loaders/unet.py
+++ b/src/diffusers/loaders/unet.py
@@ -18,7 +18,6 @@
from contextlib import nullcontext
from functools import partial
from pathlib import Path
-from types import NoneType
from typing import Callable, Dict, List, Optional, Union
import safetensors
@@ -677,7 +676,7 @@ def layer_name(name):
def set_adapters(
self,
adapter_names: Union[List[str], str],
- weights: Optional[Union[float, Dict, List[float], List[Dict], List[NoneType]]] = None,
+ weights: Optional[Union[float, Dict, List[float], List[Dict], List[None]]] = None,
):
"""
Set the currently active adapters for use in the UNet.
diff --git a/tests/lora/utils.py b/tests/lora/utils.py
index 9f3c46538d0b..e2e3ca44e130 100644
--- a/tests/lora/utils.py
+++ b/tests/lora/utils.py
@@ -880,6 +880,11 @@ def test_simple_inference_with_text_unet_multi_adapter_block_lora(self):
"output with no lora and output with lora disabled should give same results",
)
+ # a mismatching number of adapter_names and adapter_weights should raise an error
+ with self.assertRaises(ValueError):
+ pipe.set_adapters(["adapter-1", "adapter-2"], [scales_1])
+
+
def test_simple_inference_with_text_unet_block_scale_for_all_dict_options(self):
"""Tests that any valid combination of lora block scales can be used in pipe.set_adapter"""
From 38038b75e8657dba3bccc3ff24a0804404f593f9 Mon Sep 17 00:00:00 2001
From: UmerHA <40663591+UmerHA@users.noreply.github.com>
Date: Thu, 21 Mar 2024 22:54:46 +0100
Subject: [PATCH 14/25] Update using_peft_for_inference.md
---
docs/source/en/tutorials/using_peft_for_inference.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/source/en/tutorials/using_peft_for_inference.md b/docs/source/en/tutorials/using_peft_for_inference.md
index 581c8c077751..8236f7990d7a 100644
--- a/docs/source/en/tutorials/using_peft_for_inference.md
+++ b/docs/source/en/tutorials/using_peft_for_inference.md
@@ -138,13 +138,13 @@ For even more customization, you can control how strongly the adapter affects ea
For example, here's how you can turn on the adapter for the `text_encoder` and `down` parts, but turn it off for the `mid` and `up` parts:
```python
pipe.enable_lora() # enable lora again, after we disabled it above
+prompt = "toy_face of a hacker with a hoodie, pixel art"
adapter_weight_scales = {
"text_encoder": 1,
"unet": { "down": 1, "mid": 0, "up": 0}
}
pipe.set_adapters("pixel", adapter_weight_scales)
-prompt = "a hacker with a hoodie"
image = pipe(prompt, num_inference_steps=30, generator=torch.manual_seed(0)).images[0]
image
```
From 3ed3ca57b975e1fd25c8b864f4ed25c325939ae3 Mon Sep 17 00:00:00 2001
From: UmerHA <40663591+UmerHA@users.noreply.github.com>
Date: Thu, 21 Mar 2024 23:03:08 +0100
Subject: [PATCH 15/25] Update using_peft_for_inference.md
---
docs/source/en/tutorials/using_peft_for_inference.md | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/docs/source/en/tutorials/using_peft_for_inference.md b/docs/source/en/tutorials/using_peft_for_inference.md
index 028476a5c752..8512b6d8cc81 100644
--- a/docs/source/en/tutorials/using_peft_for_inference.md
+++ b/docs/source/en/tutorials/using_peft_for_inference.md
@@ -130,6 +130,7 @@ prompt = "toy_face of a hacker with a hoodie"
image = pipe(prompt, num_inference_steps=30, generator=torch.manual_seed(0)).images[0]
image
```
+

### Customize adapters strength
@@ -148,6 +149,7 @@ pipe.set_adapters("pixel", adapter_weight_scales)
image = pipe(prompt, num_inference_steps=30, generator=torch.manual_seed(0)).images[0]
image
```
+

Let's see how turning off the `down` part and turning on the `mid` and `up` part respectively changes the image.
@@ -160,6 +162,7 @@ pipe.set_adapters("pixel", adapter_weight_scales)
image = pipe(prompt, num_inference_steps=30, generator=torch.manual_seed(0)).images[0]
image
```
+

```python
@@ -171,6 +174,7 @@ pipe.set_adapters("pixel", adapter_weight_scales)
image = pipe(prompt, num_inference_steps=30, generator=torch.manual_seed(0)).images[0]
image
```
+

Looks cool!
@@ -193,6 +197,7 @@ pipe.set_adapters(["toy", "pixel"], [adapter_weight_scales_toy, adapter_weight_s
image = pipe(prompt, num_inference_steps=30, generator=torch.manual_seed(0)).images[0]
image
```
+

## Manage active adapters
From 24d376fa4c812c0cdf862f9ee49d5d1d12500f87 Mon Sep 17 00:00:00 2001
From: UmerHA <40663591+UmerHA@users.noreply.github.com>
Date: Fri, 22 Mar 2024 11:51:09 +0100
Subject: [PATCH 16/25] Make style, quality, fix-copies
---
tests/lora/utils.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/tests/lora/utils.py b/tests/lora/utils.py
index e2e3ca44e130..f3daec194b16 100644
--- a/tests/lora/utils.py
+++ b/tests/lora/utils.py
@@ -884,7 +884,6 @@ def test_simple_inference_with_text_unet_multi_adapter_block_lora(self):
with self.assertRaises(ValueError):
pipe.set_adapters(["adapter-1", "adapter-2"], [scales_1])
-
def test_simple_inference_with_text_unet_block_scale_for_all_dict_options(self):
"""Tests that any valid combination of lora block scales can be used in pipe.set_adapter"""
From 7dfa8e38341f62d4566e577e9b011216755d6c66 Mon Sep 17 00:00:00 2001
From: UmerHA <40663591+UmerHA@users.noreply.github.com>
Date: Fri, 22 Mar 2024 14:41:18 +0100
Subject: [PATCH 17/25] Updated tutorial;Warning if scale/adapter mismatch
---
.../en/tutorials/using_peft_for_inference.md | 11 +++++-----
src/diffusers/loaders/lora.py | 21 +++++++++++++++++++
2 files changed, 27 insertions(+), 5 deletions(-)
diff --git a/docs/source/en/tutorials/using_peft_for_inference.md b/docs/source/en/tutorials/using_peft_for_inference.md
index 8512b6d8cc81..d9dd58222e84 100644
--- a/docs/source/en/tutorials/using_peft_for_inference.md
+++ b/docs/source/en/tutorials/using_peft_for_inference.md
@@ -139,13 +139,14 @@ For even more customization, you can control how strongly the adapter affects ea
For example, here's how you can turn on the adapter for the `text_encoder` and `down` parts, but turn it off for the `mid` and `up` parts:
```python
pipe.enable_lora() # enable lora again, after we disabled it above
-prompt = "toy_face of a hacker with a hoodie, pixel art"
+pipe.load_lora_weights("veryVANYA/ps1-graphics-sdxl-v2", weight_name="ps1_style_SDXL_v2.safetensors", adapter_name="ps1") # load a lora that also changes the text_encoder
+prompt = "hacker with a hoodie, ps1 style" # trigger word is "ps1 style"
adapter_weight_scales = {
"text_encoder": 1,
"unet": { "down": 1, "mid": 0, "up": 0}
}
-pipe.set_adapters("pixel", adapter_weight_scales)
+pipe.set_adapters("ps1", adapter_weight_scales)
image = pipe(prompt, num_inference_steps=30, generator=torch.manual_seed(0)).images[0]
image
```
@@ -158,7 +159,7 @@ adapter_weight_scales = {
"text_encoder": 1,
"unet": { "down": 0, "mid": 1, "up": 0}
}
-pipe.set_adapters("pixel", adapter_weight_scales)
+pipe.set_adapters("ps1", adapter_weight_scales)
image = pipe(prompt, num_inference_steps=30, generator=torch.manual_seed(0)).images[0]
image
```
@@ -170,7 +171,7 @@ adapter_weight_scales = {
"text_encoder": 1,
"unet": { "down": 0, "mid": 1, "up": 0}
}
-pipe.set_adapters("pixel", adapter_weight_scales)
+pipe.set_adapters("ps1", adapter_weight_scales)
image = pipe(prompt, num_inference_steps=30, generator=torch.manual_seed(0)).images[0]
image
```
@@ -181,9 +182,9 @@ Looks cool!
This is a really powerful feature. You can use it to control the adapter strengths down to per-transformer level. And you can even use it for multiple adapters.
```python
+prompt = "toy_face of a hacker with a hoodie, pixel art"
adapter_weight_scales_toy = 0.5
adapter_weight_scales_pixel = {
- "text_encoder": 0.5,
"unet": {
"down": 0.9, # all transformers in the down-part will use scale 0.9
# "mid" # because, in this example, "mid" is not given, all transformers in the mid part will use the default scale 1.0
diff --git a/src/diffusers/loaders/lora.py b/src/diffusers/loaders/lora.py
index e1def27e42b9..912e1f1279c3 100644
--- a/src/diffusers/loaders/lora.py
+++ b/src/diffusers/loaders/lora.py
@@ -1057,6 +1057,16 @@ def set_adapters(
f"Length of adapter names {len(adapter_names)} is not equal to the length of the weights {len(adapter_weights)}"
)
+ def warn_if_adapter_misses_part(adapter_name, part_name):
+ adapter_list = self.get_list_adapters() # e.g. {"unet": ["name1"], "text_encoder": ["name1", "name2"]}
+ adapter_parts = [
+ part for part, adapters in adapter_list.items() for adapter in adapters if adapter == adapter_name
+ ]
+ if part_name not in adapter_parts:
+ logger.warning(
+ f"Lora weight dict for adapter '{adapter_name}' contains {part_name}, but this will be ignored because {adapter_name} does not contain weights for {part_name}. Valid parts for {adapter_name} are: {adapter_parts}."
+ )
+
# Decompose weights into weights for unet, text_encoder and text_encoder_2
unet_lora_weights, text_encoder_lora_weights, text_encoder_2_lora_weights = [], [], []
@@ -1075,6 +1085,17 @@ def set_adapters(
logger.warning(
"Lora weight dict contains text_encoder_2 weights but will be ignored because pipeline does not have text_encoder_2."
)
+
+ # warn if adapter doesn't have parts specified by adapter_weights
+ if unet_weight is not None:
+ warn_if_adapter_misses_part(adapter_name, "unet")
+
+ if text_encoder_weight is not None:
+ warn_if_adapter_misses_part(adapter_name, "text_encoder")
+
+ if text_encoder_2_weight is not None:
+ warn_if_adapter_misses_part(adapter_name, "text_encoder_2")
+
else:
unet_weight = weights
text_encoder_weight = weights
From 9c6f61332759dc91fddc52e25618f2c2206d6671 Mon Sep 17 00:00:00 2001
From: UmerHA <40663591+UmerHA@users.noreply.github.com>
Date: Sat, 23 Mar 2024 01:10:21 +0100
Subject: [PATCH 18/25] floats are forwarded as-is; changed tutorial scale
---
.../en/tutorials/using_peft_for_inference.md | 26 +--
src/diffusers/loaders/lora.py | 3 -
src/diffusers/loaders/unet.py | 130 +--------------
src/diffusers/loaders/unet_loader_utils.py | 149 ++++++++++++++++++
src/diffusers/utils/peft_utils.py | 6 +-
5 files changed, 166 insertions(+), 148 deletions(-)
create mode 100644 src/diffusers/loaders/unet_loader_utils.py
diff --git a/docs/source/en/tutorials/using_peft_for_inference.md b/docs/source/en/tutorials/using_peft_for_inference.md
index d9dd58222e84..84d16abfa899 100644
--- a/docs/source/en/tutorials/using_peft_for_inference.md
+++ b/docs/source/en/tutorials/using_peft_for_inference.md
@@ -139,14 +139,9 @@ For even more customization, you can control how strongly the adapter affects ea
For example, here's how you can turn on the adapter for the `text_encoder` and `down` parts, but turn it off for the `mid` and `up` parts:
```python
pipe.enable_lora() # enable lora again, after we disabled it above
-pipe.load_lora_weights("veryVANYA/ps1-graphics-sdxl-v2", weight_name="ps1_style_SDXL_v2.safetensors", adapter_name="ps1") # load a lora that also changes the text_encoder
-prompt = "hacker with a hoodie, ps1 style" # trigger word is "ps1 style"
-
-adapter_weight_scales = {
- "text_encoder": 1,
- "unet": { "down": 1, "mid": 0, "up": 0}
-}
-pipe.set_adapters("ps1", adapter_weight_scales)
+prompt = "toy_face of a hacker with a hoodie, pixel art"
+adapter_weight_scales = { "unet": { "down": 1, "mid": 0, "up": 0} }
+pipe.set_adapters("pixel", adapter_weight_scales)
image = pipe(prompt, num_inference_steps=30, generator=torch.manual_seed(0)).images[0]
image
```
@@ -155,11 +150,8 @@ image
Let's see how turning off the `down` part and turning on the `mid` and `up` part respectively changes the image.
```python
-adapter_weight_scales = {
- "text_encoder": 1,
- "unet": { "down": 0, "mid": 1, "up": 0}
-}
-pipe.set_adapters("ps1", adapter_weight_scales)
+adapter_weight_scales = { "unet": { "down": 0, "mid": 1, "up": 0} }
+pipe.set_adapters("pixel", adapter_weight_scales)
image = pipe(prompt, num_inference_steps=30, generator=torch.manual_seed(0)).images[0]
image
```
@@ -167,11 +159,8 @@ image

```python
-adapter_weight_scales = {
- "text_encoder": 1,
- "unet": { "down": 0, "mid": 1, "up": 0}
-}
-pipe.set_adapters("ps1", adapter_weight_scales)
+adapter_weight_scales = { "unet": { "down": 0, "mid": 0, "up": 1} }
+pipe.set_adapters("pixel", adapter_weight_scales)
image = pipe(prompt, num_inference_steps=30, generator=torch.manual_seed(0)).images[0]
image
```
@@ -182,7 +171,6 @@ Looks cool!
This is a really powerful feature. You can use it to control the adapter strengths down to per-transformer level. And you can even use it for multiple adapters.
```python
-prompt = "toy_face of a hacker with a hoodie, pixel art"
adapter_weight_scales_toy = 0.5
adapter_weight_scales_pixel = {
"unet": {
diff --git a/src/diffusers/loaders/lora.py b/src/diffusers/loaders/lora.py
index 912e1f1279c3..797ba20c0f74 100644
--- a/src/diffusers/loaders/lora.py
+++ b/src/diffusers/loaders/lora.py
@@ -992,9 +992,6 @@ def process_weights(adapter_names, weights):
# e.g. [7,7] -> [7,7] ; [3, None] -> [3,1]
weights = [w if w is not None else 1.0 for w in weights]
- # e.g. [3,1] -> [{"text_model": 3} , {"text_model": 1} ]
- weights = [{"text_model": w} for w in weights]
-
return weights
adapter_names = [adapter_names] if isinstance(adapter_names, str) else adapter_names
diff --git a/src/diffusers/loaders/unet.py b/src/diffusers/loaders/unet.py
index 6cad6437a24a..c0c1f6856400 100644
--- a/src/diffusers/loaders/unet.py
+++ b/src/diffusers/loaders/unet.py
@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-import copy
import inspect
import os
from collections import defaultdict
@@ -48,6 +47,7 @@
infer_stable_cascade_single_file_config,
load_single_file_model_checkpoint,
)
+from .unet_loader_utils import maybe_expand_lora_scales
from .utils import AttnProcsLayers
@@ -562,117 +562,6 @@ def _unfuse_lora_apply(self, module):
if isinstance(module, BaseTunerLayer):
module.unmerge()
- def _expand_lora_scales_dict(
- self,
- scales: Union[float, Dict],
- blocks_with_transformer: Dict[str, int],
- transformer_per_block: Dict[str, int],
- ):
- """
- Expands the inputs into a more granular dictionary. See the example below for more details.
-
- Parameters:
- scales (`Union[float, Dict]`):
- Scales dict to expand.
- blocks_with_transformer (`Dict[str, int]`):
- Dict with keys 'up' and 'down', showing which blocks have transformer layers
- transformer_per_block (`Dict[str, int]`):
- Dict with keys 'up' and 'down', showing how many transformer layers each block has
-
- E.g. turns
- ```python
- scales = {
- 'down': 2,
- 'mid': 3,
- 'up': {
- 'block_0': 4,
- 'block_1': [5, 6, 7]
- }
- }
- blocks_with_transformer = {
- 'down': [1,2],
- 'up': [0,1]
- }
- transformer_per_block = {
- 'down': 2,
- 'up': 3
- }
- ```
- into
- ```python
- {
- 'down.block_1.0': 2,
- 'down.block_1.1': 2,
- 'down.block_2.0': 2,
- 'down.block_2.1': 2,
- 'mid': 3,
- 'up.block_0.0': 4,
- 'up.block_0.1': 4,
- 'up.block_0.2': 4,
- 'up.block_1.0': 5,
- 'up.block_1.1': 6,
- 'up.block_1.2': 7,
- }
- ```
- """
- if sorted(blocks_with_transformer.keys()) != ["down", "up"]:
- raise ValueError("blocks_with_transformer needs to be a dict with keys `'down' and `'up'`")
-
- if sorted(transformer_per_block.keys()) != ["down", "up"]:
- raise ValueError("transformer_per_block needs to be a dict with keys `'down' and `'up'`")
-
- scales = copy.deepcopy(scales)
-
- if not isinstance(scales, dict):
- scales = {o: scales for o in ["down", "mid", "up"]}
-
- if "mid" not in scales:
- scales["mid"] = 1
-
- for updown in ["up", "down"]:
- if updown not in scales:
- scales[updown] = 1
-
- # eg {"down": 1} to {"down": {"block_1": 1, "block_2": 1}}}
- if not isinstance(scales[updown], dict):
- scales[updown] = {f"block_{i}": scales[updown] for i in blocks_with_transformer[updown]}
-
- # eg {"down": "block_1": 1}} to {"down": "block_1": [1, 1]}}
- for i in blocks_with_transformer[updown]:
- block = f"block_{i}"
- if not isinstance(scales[updown][block], list):
- scales[updown][block] = [scales[updown][block] for _ in range(transformer_per_block[updown])]
-
- # eg {"down": "block_1": [1, 1]}} to {"down.block_1.0": 1, "down.block_1.1": 1}
- for i in blocks_with_transformer[updown]:
- block = f"block_{i}"
- for tf_idx, value in enumerate(scales[updown][block]):
- scales[f"{updown}.{block}.{tf_idx}"] = value
-
- del scales[updown]
-
- def layer_name(name):
- """Translate user-friendly name (e.g. 'mid') into actual layer name (e.g. 'mid_block.attentions.0')"""
- if name == "mid":
- return "mid_block.attentions.0"
-
- updown, block, attn = name.split(".")
-
- updown = updown.replace("down", "down_blocks").replace("up", "up_blocks")
- block = block.replace("block_", "")
- attn = "attentions." + attn
-
- return ".".join((updown, block, attn))
-
- state_dict = self.state_dict()
- for layer in scales.keys():
- if not any(layer_name(layer) in module for module in state_dict.keys()):
- raise ValueError(
- f"Can't set lora scale for layer {layer}. It either doesn't exist in this unet or it has no attentions."
- )
-
- return {layer_name(name): weight for name, weight in scales.items()}
-
def set_adapters(
self,
adapter_names: Union[List[str], str],
@@ -710,7 +599,7 @@ def set_adapters(
adapter_names = [adapter_names] if isinstance(adapter_names, str) else adapter_names
# Expand weights into a list, one entry per adapter
- # examples for e.g. 2 adapters: [7,7] -> [7,7] ; None -> [None, None]
+ # examples for e.g. 2 adapters: [{...}, 7] -> [7,7] ; None -> [None, None]
if not isinstance(weights, list):
weights = [weights] * len(adapter_names)
@@ -720,20 +609,11 @@ def set_adapters(
)
# Set None values to default of 1.0
- # e.g. [7,7] -> [7,7] ; [None, None] -> [1.0, 1.0]
+ # e.g. [{...}, 7] -> [{...}, 7] ; [None, None] -> [1.0, 1.0]
weights = [w if w is not None else 1.0 for w in weights]
- blocks_with_transformer = {
- "down": [i for i, block in enumerate(self.down_blocks) if hasattr(block, "attentions")],
- "up": [i for i, block in enumerate(self.up_blocks) if hasattr(block, "attentions")],
- }
- transformer_per_block = {"down": self.config.layers_per_block, "up": self.config.layers_per_block + 1}
-
- # e.g. [7,7] -> [{...}, {...}]
- weights = [
- self._expand_lora_scales_dict(weight_for_adapter, blocks_with_transformer, transformer_per_block)
- for weight_for_adapter in weights
- ]
+ # e.g. [{...}, 7] -> [{expanded dict...}, 7]
+ weights = maybe_expand_lora_scales(self, weights)
set_weights_and_activate_adapters(self, adapter_names, weights)
diff --git a/src/diffusers/loaders/unet_loader_utils.py b/src/diffusers/loaders/unet_loader_utils.py
new file mode 100644
index 000000000000..13e65a65bc81
--- /dev/null
+++ b/src/diffusers/loaders/unet_loader_utils.py
@@ -0,0 +1,149 @@
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import copy
+from typing import TYPE_CHECKING, Dict, List, Union
+
+
+if TYPE_CHECKING:
+ # import here to avoid circular imports
+ from ..models import UNet2DConditionModel
+
+def translate_into_actual_layer_name(name):
+ """Translate user-friendly name (e.g. 'mid') into actual layer name (e.g. 'mid_block.attentions.0')"""
+ if name == "mid":
+ return "mid_block.attentions.0"
+
+ updown, block, attn = name.split(".")
+
+ updown = updown.replace("down", "down_blocks").replace("up", "up_blocks")
+ block = block.replace("block_", "")
+ attn = "attentions." + attn
+
+ return ".".join((updown, block, attn))
+
+
+def maybe_expand_lora_scales(unet: 'UNet2DConditionModel', weight_scales: List[Union[float, Dict]]):
+ blocks_with_transformer = {
+ "down": [i for i, block in enumerate(unet.down_blocks) if hasattr(block, "attentions")],
+ "up": [i for i, block in enumerate(unet.up_blocks) if hasattr(block, "attentions")],
+ }
+ transformer_per_block = {"down": unet.config.layers_per_block, "up": unet.config.layers_per_block + 1}
+
+ expanded_weight_scales = [
+ maybe_expand_lora_scales_for_one_adapter(
+ weight_for_adapter, blocks_with_transformer, transformer_per_block, unet.state_dict()
+ )
+ for weight_for_adapter in weight_scales
+ ]
+
+ return expanded_weight_scales
+
+
+def maybe_expand_lora_scales_for_one_adapter(
+ scales: Union[float, Dict],
+ blocks_with_transformer: Dict[str, int],
+ transformer_per_block: Dict[str, int],
+ state_dict: None,
+):
+ """
+ Expands the inputs into a more granular dictionary. See the example below for more details.
+
+ Parameters:
+ scales (`Union[float, Dict]`):
+ Scales dict to expand.
+ blocks_with_transformer (`Dict[str, int]`):
+ Dict with keys 'up' and 'down', showing which blocks have transformer layers
+ transformer_per_block (`Dict[str, int]`):
+ Dict with keys 'up' and 'down', showing how many transformer layers each block has
+
+ E.g. turns
+ ```python
+ scales = {
+ 'down': 2,
+ 'mid': 3,
+ 'up': {
+ 'block_0': 4,
+ 'block_1': [5, 6, 7]
+ }
+ }
+ blocks_with_transformer = {
+ 'down': [1,2],
+ 'up': [0,1]
+ }
+ transformer_per_block = {
+ 'down': 2,
+ 'up': 3
+ }
+ ```
+ into
+ ```python
+ {
+ 'down.block_1.0': 2,
+ 'down.block_1.1': 2,
+ 'down.block_2.0': 2,
+ 'down.block_2.1': 2,
+ 'mid': 3,
+ 'up.block_0.0': 4,
+ 'up.block_0.1': 4,
+ 'up.block_0.2': 4,
+ 'up.block_1.0': 5,
+ 'up.block_1.1': 6,
+ 'up.block_1.2': 7,
+ }
+ ```
+ """
+ if sorted(blocks_with_transformer.keys()) != ["down", "up"]:
+ raise ValueError("blocks_with_transformer needs to be a dict with keys `'down' and `'up'`")
+
+ if sorted(transformer_per_block.keys()) != ["down", "up"]:
+ raise ValueError("transformer_per_block needs to be a dict with keys `'down' and `'up'`")
+
+ if not isinstance(scales, dict):
+ # don't expand if scales is a single number
+ return scales
+
+ scales = copy.deepcopy(scales)
+
+ if "mid" not in scales:
+ scales["mid"] = 1
+
+ for updown in ["up", "down"]:
+ if updown not in scales:
+ scales[updown] = 1
+
+ # eg {"down": 1} to {"down": {"block_1": 1, "block_2": 1}}}
+ if not isinstance(scales[updown], dict):
+ scales[updown] = {f"block_{i}": scales[updown] for i in blocks_with_transformer[updown]}
+
+ # eg {"down": "block_1": 1}} to {"down": "block_1": [1, 1]}}
+ for i in blocks_with_transformer[updown]:
+ block = f"block_{i}"
+ if not isinstance(scales[updown][block], list):
+ scales[updown][block] = [scales[updown][block] for _ in range(transformer_per_block[updown])]
+
+ # eg {"down": "block_1": [1, 1]}} to {"down.block_1.0": 1, "down.block_1.1": 1}
+ for i in blocks_with_transformer[updown]:
+ block = f"block_{i}"
+ for tf_idx, value in enumerate(scales[updown][block]):
+ scales[f"{updown}.{block}.{tf_idx}"] = value
+
+ del scales[updown]
+
+ for layer in scales.keys():
+ if not any(translate_into_actual_layer_name(layer) in module for module in state_dict.keys()):
+ raise ValueError(
+ f"Can't set lora scale for layer {layer}. It either doesn't exist in this unet or it has no attentions."
+ )
+
+ return {translate_into_actual_layer_name(name): weight for name, weight in scales.items()}
diff --git a/src/diffusers/utils/peft_utils.py b/src/diffusers/utils/peft_utils.py
index e557aeb0aca6..9ed45150dce9 100644
--- a/src/diffusers/utils/peft_utils.py
+++ b/src/diffusers/utils/peft_utils.py
@@ -229,10 +229,14 @@ def set_weights_and_activate_adapters(model, adapter_names, weights):
from peft.tuners.tuners_utils import BaseTunerLayer
def get_module_weight(weight_for_adapter, module_name):
+ if not isinstance(weight_for_adapter, dict):
+ # If weight_for_adapter is a single number, always return it.
+ return weight_for_adapter
+
for layer_name, weight_ in weight_for_adapter.items():
if layer_name in module_name:
return weight_
- raise RuntimeError(f"No LoRA weight found for module {module_name}, which should never happen.")
+ raise RuntimeError(f"No LoRA weight found for module {module_name}.")
# iterate over each adapter, make it active and set the corresponding scaling weight
for adapter_name, weight in zip(adapter_names, weights):
From a469a4d1a2680d6a3b1858fe889e5a895977156d Mon Sep 17 00:00:00 2001
From: UmerHA <40663591+UmerHA@users.noreply.github.com>
Date: Sat, 23 Mar 2024 01:10:54 +0100
Subject: [PATCH 19/25] make style, quality, fix-copies
---
src/diffusers/loaders/unet_loader_utils.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/diffusers/loaders/unet_loader_utils.py b/src/diffusers/loaders/unet_loader_utils.py
index 13e65a65bc81..a659ead16e68 100644
--- a/src/diffusers/loaders/unet_loader_utils.py
+++ b/src/diffusers/loaders/unet_loader_utils.py
@@ -19,6 +19,7 @@
# import here to avoid circular imports
from ..models import UNet2DConditionModel
+
def translate_into_actual_layer_name(name):
"""Translate user-friendly name (e.g. 'mid') into actual layer name (e.g. 'mid_block.attentions.0')"""
if name == "mid":
@@ -33,7 +34,7 @@ def translate_into_actual_layer_name(name):
return ".".join((updown, block, attn))
-def maybe_expand_lora_scales(unet: 'UNet2DConditionModel', weight_scales: List[Union[float, Dict]]):
+def maybe_expand_lora_scales(unet: "UNet2DConditionModel", weight_scales: List[Union[float, Dict]]):
blocks_with_transformer = {
"down": [i for i, block in enumerate(unet.down_blocks) if hasattr(block, "attentions")],
"up": [i for i, block in enumerate(unet.up_blocks) if hasattr(block, "attentions")],
From 957358bd0c4013da4251bd3b8bd76b7db096f843 Mon Sep 17 00:00:00 2001
From: UmerHA <40663591+UmerHA@users.noreply.github.com>
Date: Sat, 23 Mar 2024 01:19:56 +0100
Subject: [PATCH 20/25] Fixed typo in tutorial
---
docs/source/en/tutorials/using_peft_for_inference.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/source/en/tutorials/using_peft_for_inference.md b/docs/source/en/tutorials/using_peft_for_inference.md
index 84d16abfa899..a280601a5e31 100644
--- a/docs/source/en/tutorials/using_peft_for_inference.md
+++ b/docs/source/en/tutorials/using_peft_for_inference.md
@@ -182,7 +182,7 @@ adapter_weight_scales_pixel = {
}
}
}
-pipe.set_adapters(["toy", "pixel"], [adapter_weight_scales_toy, adapter_weight_scales_pixel]])
+pipe.set_adapters(["toy", "pixel"], [adapter_weight_scales_toy, adapter_weight_scales_pixel])
image = pipe(prompt, num_inference_steps=30, generator=torch.manual_seed(0)).images[0]
image
```
From cb062b638dcc8110ceaae568817504d82eab41d5 Mon Sep 17 00:00:00 2001
From: UmerHA <40663591+UmerHA@users.noreply.github.com>
Date: Sat, 23 Mar 2024 13:34:27 +0100
Subject: [PATCH 21/25] Moved some warnings into `lora_loader_utils.py`
---
src/diffusers/loaders/lora.py | 46 ++++++++-----------
...t_loader_utils.py => lora_loader_utils.py} | 25 ++++++++++
src/diffusers/loaders/unet.py | 2 +-
3 files changed, 44 insertions(+), 29 deletions(-)
rename src/diffusers/loaders/{unet_loader_utils.py => lora_loader_utils.py} (82%)
diff --git a/src/diffusers/loaders/lora.py b/src/diffusers/loaders/lora.py
index 797ba20c0f74..73b3c095ec88 100644
--- a/src/diffusers/loaders/lora.py
+++ b/src/diffusers/loaders/lora.py
@@ -45,6 +45,7 @@
set_weights_and_activate_adapters,
)
from .lora_conversion_utils import _convert_kohya_lora_to_diffusers, _maybe_map_sgm_blocks_to_diffusers
+from .lora_loader_utils import warn_if_adapter_and_scales_mismatch
if is_transformers_available():
@@ -1054,53 +1055,42 @@ def set_adapters(
f"Length of adapter names {len(adapter_names)} is not equal to the length of the weights {len(adapter_weights)}"
)
- def warn_if_adapter_misses_part(adapter_name, part_name):
- adapter_list = self.get_list_adapters() # e.g. {"unet": ["name1"], "text_encoder": ["name1", "name2"]}
- adapter_parts = [
- part for part, adapters in adapter_list.items() for adapter in adapters if adapter == adapter_name
- ]
- if part_name not in adapter_parts:
- logger.warning(
- f"Lora weight dict for adapter '{adapter_name}' contains {part_name}, but this will be ignored because {adapter_name} does not contain weights for {part_name}. Valid parts for {adapter_name} are: {adapter_parts}."
- )
-
# Decompose weights into weights for unet, text_encoder and text_encoder_2
unet_lora_weights, text_encoder_lora_weights, text_encoder_2_lora_weights = [], [], []
for adapter_name, weights in zip(adapter_names, adapter_weights):
if isinstance(weights, dict):
- unet_weight = weights.pop("unet", None)
- text_encoder_weight = weights.pop("text_encoder", None)
- text_encoder_2_weight = weights.pop("text_encoder_2", None)
+ unet_lora_weight = weights.pop("unet", None)
+ text_encoder_lora_weight = weights.pop("text_encoder", None)
+ text_encoder_2_lora_weight = weights.pop("text_encoder_2", None)
if len(weights) > 0:
raise ValueError(
f"Got invalid key '{weights.keys()}' in lora weight dict for adapter {adapter_name}."
)
- if text_encoder_2_weight is not None and not hasattr(self, "text_encoder_2"):
+ if text_encoder_2_lora_weight is not None and not hasattr(self, "text_encoder_2"):
logger.warning(
"Lora weight dict contains text_encoder_2 weights but will be ignored because pipeline does not have text_encoder_2."
)
# warn if adapter doesn't have parts specified by adapter_weights
- if unet_weight is not None:
- warn_if_adapter_misses_part(adapter_name, "unet")
-
- if text_encoder_weight is not None:
- warn_if_adapter_misses_part(adapter_name, "text_encoder")
-
- if text_encoder_2_weight is not None:
- warn_if_adapter_misses_part(adapter_name, "text_encoder_2")
+ warn_if_adapter_and_scales_mismatch(
+ adapter_name,
+ self.get_list_adapters(),
+ unet_lora_weight,
+ text_encoder_lora_weight,
+ text_encoder_2_lora_weight,
+ )
else:
- unet_weight = weights
- text_encoder_weight = weights
- text_encoder_2_weight = weights
+ unet_lora_weight = weights
+ text_encoder_lora_weight = weights
+ text_encoder_2_lora_weight = weights
- unet_lora_weights.append(unet_weight)
- text_encoder_lora_weights.append(text_encoder_weight)
- text_encoder_2_lora_weights.append(text_encoder_2_weight)
+ unet_lora_weights.append(unet_lora_weight)
+ text_encoder_lora_weights.append(text_encoder_lora_weight)
+ text_encoder_2_lora_weights.append(text_encoder_2_lora_weight)
unet = getattr(self, self.unet_name) if not hasattr(self, "unet") else self.unet
# Handle the UNET
diff --git a/src/diffusers/loaders/unet_loader_utils.py b/src/diffusers/loaders/lora_loader_utils.py
similarity index 82%
rename from src/diffusers/loaders/unet_loader_utils.py
rename to src/diffusers/loaders/lora_loader_utils.py
index a659ead16e68..9c66ce496a57 100644
--- a/src/diffusers/loaders/unet_loader_utils.py
+++ b/src/diffusers/loaders/lora_loader_utils.py
@@ -14,11 +14,15 @@
import copy
from typing import TYPE_CHECKING, Dict, List, Union
+from ..utils import logging
+
if TYPE_CHECKING:
# import here to avoid circular imports
from ..models import UNet2DConditionModel
+logger = logging.get_logger(__name__) # pylint: disable=invalid-name
+
def translate_into_actual_layer_name(name):
"""Translate user-friendly name (e.g. 'mid') into actual layer name (e.g. 'mid_block.attentions.0')"""
@@ -148,3 +152,24 @@ def maybe_expand_lora_scales_for_one_adapter(
)
return {translate_into_actual_layer_name(name): weight for name, weight in scales.items()}
+
+
+def warn_if_adapter_and_scales_mismatch(
+ adapter_name, adapter_list, unet_lora_weight, text_encoder_lora_weight, text_encoder_2_lora_weight
+):
+ """Warn if the scales dict for an adapter contain parts (unet, text_encoder, text_encoder_2) for which the adapter doesn't have weights."""
+ adapter_parts = [
+ part for part, adapters in adapter_list.items() for adapter in adapters if adapter == adapter_name
+ ]
+
+ for part_name, part_weight_scales in zip(
+ ["unet", "text_encoder", "text_encoder_2"],
+ [unet_lora_weight, text_encoder_lora_weight, text_encoder_2_lora_weight],
+ ):
+ if part_weight_scales is None:
+ continue
+
+ if part_name not in adapter_parts:
+ logger.warning(
+ f"Lora weight dict for adapter '{adapter_name}' contains {part_name}, but this will be ignored because {adapter_name} does not contain weights for {part_name}. Valid parts for {adapter_name} are: {adapter_parts}."
+ )
diff --git a/src/diffusers/loaders/unet.py b/src/diffusers/loaders/unet.py
index c0c1f6856400..78f3f852f31c 100644
--- a/src/diffusers/loaders/unet.py
+++ b/src/diffusers/loaders/unet.py
@@ -42,12 +42,12 @@
set_adapter_layers,
set_weights_and_activate_adapters,
)
+from .lora_loader_utils import maybe_expand_lora_scales
from .single_file_utils import (
convert_stable_cascade_unet_single_file_to_diffusers,
infer_stable_cascade_single_file_config,
load_single_file_model_checkpoint,
)
-from .unet_loader_utils import maybe_expand_lora_scales
from .utils import AttnProcsLayers
From a4a38df9eb242e191ac22f70526d8a53122f9462 Mon Sep 17 00:00:00 2001
From: UmerHA <40663591+UmerHA@users.noreply.github.com>
Date: Sun, 24 Mar 2024 02:25:58 +0100
Subject: [PATCH 22/25] Moved scale/lora mismatch warnings back
---
src/diffusers/loaders/lora.py | 25 +++++++++++++------
src/diffusers/loaders/unet.py | 2 +-
...a_loader_utils.py => unet_loader_utils.py} | 21 ----------------
3 files changed, 18 insertions(+), 30 deletions(-)
rename src/diffusers/loaders/{lora_loader_utils.py => unet_loader_utils.py} (84%)
diff --git a/src/diffusers/loaders/lora.py b/src/diffusers/loaders/lora.py
index 73b3c095ec88..661fc1a4992f 100644
--- a/src/diffusers/loaders/lora.py
+++ b/src/diffusers/loaders/lora.py
@@ -45,7 +45,6 @@
set_weights_and_activate_adapters,
)
from .lora_conversion_utils import _convert_kohya_lora_to_diffusers, _maybe_map_sgm_blocks_to_diffusers
-from .lora_loader_utils import warn_if_adapter_and_scales_mismatch
if is_transformers_available():
@@ -1058,6 +1057,15 @@ def set_adapters(
# Decompose weights into weights for unet, text_encoder and text_encoder_2
unet_lora_weights, text_encoder_lora_weights, text_encoder_2_lora_weights = [], [], []
+ list_adapters = self.get_list_adapters() # eg {"unet": ["adapter1", "adapter2"], "text_encoder": ["adapter2"]}
+ all_adapters = {
+ adapter for adapters in list_adapters.values() for adapter in adapters
+ } # eg ["adapter1", "adapter2"]
+ invert_list_adapters = {
+ adapter: [part for part, adapters in list_adapters.items() if adapter in adapters]
+ for adapter in all_adapters
+ } # eg {"adapter1": ["unet"], "adapter2": ["unet", "text_encoder"]}
+
for adapter_name, weights in zip(adapter_names, adapter_weights):
if isinstance(weights, dict):
unet_lora_weight = weights.pop("unet", None)
@@ -1075,13 +1083,14 @@ def set_adapters(
)
# warn if adapter doesn't have parts specified by adapter_weights
- warn_if_adapter_and_scales_mismatch(
- adapter_name,
- self.get_list_adapters(),
- unet_lora_weight,
- text_encoder_lora_weight,
- text_encoder_2_lora_weight,
- )
+ for part_weight, part_name in zip(
+ [unet_lora_weight, text_encoder_lora_weight, text_encoder_2_lora_weight],
+ ["uent", "text_encoder", "text_encoder_2"],
+ ):
+ if part_weight is not None and part_name not in invert_list_adapters[adapter_name]:
+ logger.warning(
+ f"Lora weight dict for adapter '{adapter_name}' contains {part_name}, but this will be ignored because {adapter_name} does not contain weights for {part_name}. Valid parts for {adapter_name} are: {invert_list_adapters[adapter_name]}."
+ )
else:
unet_lora_weight = weights
diff --git a/src/diffusers/loaders/unet.py b/src/diffusers/loaders/unet.py
index 78f3f852f31c..c0c1f6856400 100644
--- a/src/diffusers/loaders/unet.py
+++ b/src/diffusers/loaders/unet.py
@@ -42,12 +42,12 @@
set_adapter_layers,
set_weights_and_activate_adapters,
)
-from .lora_loader_utils import maybe_expand_lora_scales
from .single_file_utils import (
convert_stable_cascade_unet_single_file_to_diffusers,
infer_stable_cascade_single_file_config,
load_single_file_model_checkpoint,
)
+from .unet_loader_utils import maybe_expand_lora_scales
from .utils import AttnProcsLayers
diff --git a/src/diffusers/loaders/lora_loader_utils.py b/src/diffusers/loaders/unet_loader_utils.py
similarity index 84%
rename from src/diffusers/loaders/lora_loader_utils.py
rename to src/diffusers/loaders/unet_loader_utils.py
index 9c66ce496a57..075562a9c360 100644
--- a/src/diffusers/loaders/lora_loader_utils.py
+++ b/src/diffusers/loaders/unet_loader_utils.py
@@ -152,24 +152,3 @@ def maybe_expand_lora_scales_for_one_adapter(
)
return {translate_into_actual_layer_name(name): weight for name, weight in scales.items()}
-
-
-def warn_if_adapter_and_scales_mismatch(
- adapter_name, adapter_list, unet_lora_weight, text_encoder_lora_weight, text_encoder_2_lora_weight
-):
- """Warn if the scales dict for an adapter contain parts (unet, text_encoder, text_encoder_2) for which the adapter doesn't have weights."""
- adapter_parts = [
- part for part, adapters in adapter_list.items() for adapter in adapters if adapter == adapter_name
- ]
-
- for part_name, part_weight_scales in zip(
- ["unet", "text_encoder", "text_encoder_2"],
- [unet_lora_weight, text_encoder_lora_weight, text_encoder_2_lora_weight],
- ):
- if part_weight_scales is None:
- continue
-
- if part_name not in adapter_parts:
- logger.warning(
- f"Lora weight dict for adapter '{adapter_name}' contains {part_name}, but this will be ignored because {adapter_name} does not contain weights for {part_name}. Valid parts for {adapter_name} are: {adapter_parts}."
- )
From 14fabf0a6dc11affa38877b1ec49f622346913bc Mon Sep 17 00:00:00 2001
From: UmerHA <40663591+UmerHA@users.noreply.github.com>
Date: Fri, 29 Mar 2024 12:52:45 +0100
Subject: [PATCH 23/25] Integrated final review suggestions
---
docs/source/en/tutorials/using_peft_for_inference.md | 2 +-
docs/source/en/using-diffusers/loading_adapters.md | 12 ++----------
src/diffusers/loaders/unet.py | 4 ++--
src/diffusers/loaders/unet_loader_utils.py | 12 ++++++------
4 files changed, 11 insertions(+), 19 deletions(-)
diff --git a/docs/source/en/tutorials/using_peft_for_inference.md b/docs/source/en/tutorials/using_peft_for_inference.md
index a280601a5e31..1bfb3f5c48b7 100644
--- a/docs/source/en/tutorials/using_peft_for_inference.md
+++ b/docs/source/en/tutorials/using_peft_for_inference.md
@@ -136,7 +136,7 @@ image
### Customize adapters strength
For even more customization, you can control how strongly the adapter affects each part of the pipeline. For this, pass a dictionary with the control strengths (called "scales") to [`~diffusers.loaders.UNet2DConditionLoadersMixin.set_adapters`].
-For example, here's how you can turn on the adapter for the `text_encoder` and `down` parts, but turn it off for the `mid` and `up` parts:
+For example, here's how you can turn on the adapter for the `down` parts, but turn it off for the `mid` and `up` parts:
```python
pipe.enable_lora() # enable lora again, after we disabled it above
prompt = "toy_face of a hacker with a hoodie, pixel art"
diff --git a/docs/source/en/using-diffusers/loading_adapters.md b/docs/source/en/using-diffusers/loading_adapters.md
index e2ef7e80fa43..b079d2165ece 100644
--- a/docs/source/en/using-diffusers/loading_adapters.md
+++ b/docs/source/en/using-diffusers/loading_adapters.md
@@ -182,19 +182,11 @@ scales = {
pipe.set_adapters("my_adapter", scales)
```
-This also works with multiple adapters:
-```python
-pipe = ... # create pipeline
-pipe.load_lora_weights(..., adapter_name="my_adapter_1")
-pipe.load_lora_weights(..., adapter_name="my_adapter_2")
-scales_1 = { ... }
-scales_2 = { ... }
-pipe.set_adapters(["my_adapter_1", "my_adapter_2"], [scales_1, scales_2])
-```
+This also works with multiple adapters - see [this guide](https://huggingface.co/docs/diffusers/tutorials/using_peft_for_inference#customize-adapters-strength) for how to do it.
-Currently, [`~loaders.LoraLoaderMixin.set_adapters`] only supports scaling attention weights. If a LoRa has other parts (e.g., resnets or down-/upsamplers), they will keep a scale of 1.0.
+Currently, [`~loaders.LoraLoaderMixin.set_adapters`] only supports scaling attention weights. If a LoRA has other parts (e.g., resnets or down-/upsamplers), they will keep a scale of 1.0.
diff --git a/src/diffusers/loaders/unet.py b/src/diffusers/loaders/unet.py
index c0c1f6856400..8bbec26189b0 100644
--- a/src/diffusers/loaders/unet.py
+++ b/src/diffusers/loaders/unet.py
@@ -47,7 +47,7 @@
infer_stable_cascade_single_file_config,
load_single_file_model_checkpoint,
)
-from .unet_loader_utils import maybe_expand_lora_scales
+from .unet_loader_utils import _maybe_expand_lora_scales
from .utils import AttnProcsLayers
@@ -613,7 +613,7 @@ def set_adapters(
weights = [w if w is not None else 1.0 for w in weights]
# e.g. [{...}, 7] -> [{expanded dict...}, 7]
- weights = maybe_expand_lora_scales(self, weights)
+ weights = _maybe_expand_lora_scales(self, weights)
set_weights_and_activate_adapters(self, adapter_names, weights)
diff --git a/src/diffusers/loaders/unet_loader_utils.py b/src/diffusers/loaders/unet_loader_utils.py
index 075562a9c360..918a0fca06c8 100644
--- a/src/diffusers/loaders/unet_loader_utils.py
+++ b/src/diffusers/loaders/unet_loader_utils.py
@@ -24,7 +24,7 @@
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
-def translate_into_actual_layer_name(name):
+def _translate_into_actual_layer_name(name):
"""Translate user-friendly name (e.g. 'mid') into actual layer name (e.g. 'mid_block.attentions.0')"""
if name == "mid":
return "mid_block.attentions.0"
@@ -38,7 +38,7 @@ def translate_into_actual_layer_name(name):
return ".".join((updown, block, attn))
-def maybe_expand_lora_scales(unet: "UNet2DConditionModel", weight_scales: List[Union[float, Dict]]):
+def _maybe_expand_lora_scales(unet: "UNet2DConditionModel", weight_scales: List[Union[float, Dict]]):
blocks_with_transformer = {
"down": [i for i, block in enumerate(unet.down_blocks) if hasattr(block, "attentions")],
"up": [i for i, block in enumerate(unet.up_blocks) if hasattr(block, "attentions")],
@@ -46,7 +46,7 @@ def maybe_expand_lora_scales(unet: "UNet2DConditionModel", weight_scales: List[U
transformer_per_block = {"down": unet.config.layers_per_block, "up": unet.config.layers_per_block + 1}
expanded_weight_scales = [
- maybe_expand_lora_scales_for_one_adapter(
+ _maybe_expand_lora_scales_for_one_adapter(
weight_for_adapter, blocks_with_transformer, transformer_per_block, unet.state_dict()
)
for weight_for_adapter in weight_scales
@@ -55,7 +55,7 @@ def maybe_expand_lora_scales(unet: "UNet2DConditionModel", weight_scales: List[U
return expanded_weight_scales
-def maybe_expand_lora_scales_for_one_adapter(
+def _maybe_expand_lora_scales_for_one_adapter(
scales: Union[float, Dict],
blocks_with_transformer: Dict[str, int],
transformer_per_block: Dict[str, int],
@@ -146,9 +146,9 @@ def maybe_expand_lora_scales_for_one_adapter(
del scales[updown]
for layer in scales.keys():
- if not any(translate_into_actual_layer_name(layer) in module for module in state_dict.keys()):
+ if not any(_translate_into_actual_layer_name(layer) in module for module in state_dict.keys()):
raise ValueError(
f"Can't set lora scale for layer {layer}. It either doesn't exist in this unet or it has no attentions."
)
- return {translate_into_actual_layer_name(name): weight for name, weight in scales.items()}
+ return {_translate_into_actual_layer_name(name): weight for name, weight in scales.items()}
From 850016103cb19be5e2445c2bb8b99ada10c3099c Mon Sep 17 00:00:00 2001
From: UmerHA <40663591+UmerHA@users.noreply.github.com>
Date: Fri, 29 Mar 2024 15:25:34 +0100
Subject: [PATCH 24/25] Empty commit to trigger CI
---
examples/research_projects/controlnetxs/README_sdxl.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/examples/research_projects/controlnetxs/README_sdxl.md b/examples/research_projects/controlnetxs/README_sdxl.md
index d401c1e76698..45b21834f78f 100644
--- a/examples/research_projects/controlnetxs/README_sdxl.md
+++ b/examples/research_projects/controlnetxs/README_sdxl.md
@@ -10,6 +10,6 @@ Here's the overview from the [project page](https://vislearn.github.io/ControlNe
*With increasing computing capabilities, current model architectures appear to follow the trend of simply upscaling all components without validating the necessity for doing so. In this project we investigate the size and architectural design of ControlNet [Zhang et al., 2023] for controlling the image generation process with stable diffusion-based models. We show that a new architecture with as little as 1% of the parameters of the base model achieves state-of-the art results, considerably better than ControlNet in terms of FID score. Hence we call it ControlNet-XS. We provide the code for controlling StableDiffusion-XL [Podell et al., 2023] (Model B, 48M Parameters) and StableDiffusion 2.1 [Rombach et al. 2022] (Model B, 14M Parameters), all under openrail license.*
-This model was contributed by [UmerHA](https://twitter.com/UmerHAdil). ❤️
+This model was contributed by [UmerHA](https://twitter.com/UmerHAdil). ❤️❤️
> 🧠 Make sure to check out the Schedulers [guide](https://huggingface.co/docs/diffusers/main/en/using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](https://huggingface.co/docs/diffusers/main/en/using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines.
\ No newline at end of file
From 74ce9bb4302dad1be8095134312d02aff2f17cd2 Mon Sep 17 00:00:00 2001
From: UmerHA <40663591+UmerHA@users.noreply.github.com>
Date: Fri, 29 Mar 2024 15:25:55 +0100
Subject: [PATCH 25/25] Reverted emoty commit to trigger CI
---
examples/research_projects/controlnetxs/README_sdxl.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/examples/research_projects/controlnetxs/README_sdxl.md b/examples/research_projects/controlnetxs/README_sdxl.md
index 45b21834f78f..d401c1e76698 100644
--- a/examples/research_projects/controlnetxs/README_sdxl.md
+++ b/examples/research_projects/controlnetxs/README_sdxl.md
@@ -10,6 +10,6 @@ Here's the overview from the [project page](https://vislearn.github.io/ControlNe
*With increasing computing capabilities, current model architectures appear to follow the trend of simply upscaling all components without validating the necessity for doing so. In this project we investigate the size and architectural design of ControlNet [Zhang et al., 2023] for controlling the image generation process with stable diffusion-based models. We show that a new architecture with as little as 1% of the parameters of the base model achieves state-of-the art results, considerably better than ControlNet in terms of FID score. Hence we call it ControlNet-XS. We provide the code for controlling StableDiffusion-XL [Podell et al., 2023] (Model B, 48M Parameters) and StableDiffusion 2.1 [Rombach et al. 2022] (Model B, 14M Parameters), all under openrail license.*
-This model was contributed by [UmerHA](https://twitter.com/UmerHAdil). ❤️❤️
+This model was contributed by [UmerHA](https://twitter.com/UmerHAdil). ❤️
> 🧠 Make sure to check out the Schedulers [guide](https://huggingface.co/docs/diffusers/main/en/using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](https://huggingface.co/docs/diffusers/main/en/using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines.
\ No newline at end of file