From 507d043766186009fedeafa1f61f6ac97f91cfb8 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 08:52:49 +0530 Subject: [PATCH 01/44] start deprecating loraattn. --- tests/lora/test_lora_layers_old_backend.py | 857 +++++++-------------- 1 file changed, 299 insertions(+), 558 deletions(-) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index 3d3b858fa0fd..2b99655c74c0 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -22,7 +22,6 @@ import numpy as np import torch import torch.nn as nn -import torch.nn.functional as F from huggingface_hub.repocard import RepoCard from PIL import Image from transformers import CLIPTextConfig, CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer @@ -41,17 +40,14 @@ UNet2DConditionModel, UNet3DConditionModel, ) -from diffusers.loaders import AttnProcsLayers, LoraLoaderMixin +from diffusers.loaders import LoraLoaderMixin, StableDiffusionXLLoraLoaderMixin from diffusers.models.attention_processor import ( Attention, AttnProcessor, AttnProcessor2_0, - LoRAAttnProcessor, - LoRAAttnProcessor2_0, - LoRAXFormersAttnProcessor, XFormersAttnProcessor, ) -from diffusers.models.lora import PatchedLoraProjection, text_encoder_attn_modules +from diffusers.models.lora import LoRALinearLayer from diffusers.utils.import_utils import is_xformers_available from diffusers.utils.testing_utils import ( deprecate_after_peft_backend, @@ -64,109 +60,147 @@ ) -def create_lora_layers(model, mock_weights: bool = True): - lora_attn_procs = {} - for name in model.attn_processors.keys(): - cross_attention_dim = None if name.endswith("attn1.processor") else model.config.cross_attention_dim - if name.startswith("mid_block"): - hidden_size = model.config.block_out_channels[-1] - elif name.startswith("up_blocks"): - block_id = int(name[len("up_blocks.")]) - hidden_size = list(reversed(model.config.block_out_channels))[block_id] - elif name.startswith("down_blocks"): - block_id = int(name[len("down_blocks.")]) - hidden_size = model.config.block_out_channels[block_id] +# def create_lora_layers(model, mock_weights: bool = True): +# lora_attn_procs = {} +# for name in model.attn_processors.keys(): +# cross_attention_dim = None if name.endswith("attn1.processor") else model.config.cross_attention_dim +# if name.startswith("mid_block"): +# hidden_size = model.config.block_out_channels[-1] +# elif name.startswith("up_blocks"): +# block_id = int(name[len("up_blocks.")]) +# hidden_size = list(reversed(model.config.block_out_channels))[block_id] +# elif name.startswith("down_blocks"): +# block_id = int(name[len("down_blocks.")]) +# hidden_size = model.config.block_out_channels[block_id] - lora_attn_procs[name] = LoRAAttnProcessor(hidden_size=hidden_size, cross_attention_dim=cross_attention_dim) - lora_attn_procs[name] = lora_attn_procs[name].to(model.device) - - if mock_weights: - # add 1 to weights to mock trained weights - with torch.no_grad(): - lora_attn_procs[name].to_q_lora.up.weight += 1 - lora_attn_procs[name].to_k_lora.up.weight += 1 - lora_attn_procs[name].to_v_lora.up.weight += 1 - lora_attn_procs[name].to_out_lora.up.weight += 1 - - return lora_attn_procs - - -def create_unet_lora_layers(unet: nn.Module): - lora_attn_procs = {} - for name in unet.attn_processors.keys(): - cross_attention_dim = None if name.endswith("attn1.processor") else unet.config.cross_attention_dim - if name.startswith("mid_block"): - hidden_size = unet.config.block_out_channels[-1] - elif name.startswith("up_blocks"): - block_id = int(name[len("up_blocks.")]) - hidden_size = list(reversed(unet.config.block_out_channels))[block_id] - elif name.startswith("down_blocks"): - block_id = int(name[len("down_blocks.")]) - hidden_size = unet.config.block_out_channels[block_id] - lora_attn_processor_class = ( - LoRAAttnProcessor2_0 if hasattr(F, "scaled_dot_product_attention") else LoRAAttnProcessor - ) - lora_attn_procs[name] = lora_attn_processor_class( - hidden_size=hidden_size, cross_attention_dim=cross_attention_dim - ) - unet_lora_layers = AttnProcsLayers(lora_attn_procs) - return lora_attn_procs, unet_lora_layers +# lora_attn_procs[name] = LoRAAttnProcessor(hidden_size=hidden_size, cross_attention_dim=cross_attention_dim) +# lora_attn_procs[name] = lora_attn_procs[name].to(model.device) +# if mock_weights: +# # add 1 to weights to mock trained weights +# with torch.no_grad(): +# lora_attn_procs[name].to_q_lora.up.weight += 1 +# lora_attn_procs[name].to_k_lora.up.weight += 1 +# lora_attn_procs[name].to_v_lora.up.weight += 1 +# lora_attn_procs[name].to_out_lora.up.weight += 1 -def create_text_encoder_lora_attn_procs(text_encoder: nn.Module): - text_lora_attn_procs = {} - lora_attn_processor_class = ( - LoRAAttnProcessor2_0 if hasattr(F, "scaled_dot_product_attention") else LoRAAttnProcessor - ) - for name, module in text_encoder_attn_modules(text_encoder): - if isinstance(module.out_proj, nn.Linear): - out_features = module.out_proj.out_features - elif isinstance(module.out_proj, PatchedLoraProjection): - out_features = module.out_proj.regular_linear_layer.out_features - else: - assert False, module.out_proj.__class__ - - text_lora_attn_procs[name] = lora_attn_processor_class(hidden_size=out_features, cross_attention_dim=None) - return text_lora_attn_procs +# return lora_attn_procs -def create_text_encoder_lora_layers(text_encoder: nn.Module): - text_lora_attn_procs = create_text_encoder_lora_attn_procs(text_encoder) - text_encoder_lora_layers = AttnProcsLayers(text_lora_attn_procs) - return text_encoder_lora_layers +def create_unet_lora_layers(unet: nn.Module, rank=4, is_3d=False, mock_weights=True): + unet_lora_parameters = [] + in_features = None - -def create_lora_3d_layers(model, mock_weights: bool = True): - lora_attn_procs = {} - for name in model.attn_processors.keys(): - has_cross_attention = name.endswith("attn2.processor") and not ( - name.startswith("transformer_in") or "temp_attentions" in name.split(".") - ) - cross_attention_dim = model.config.cross_attention_dim if has_cross_attention else None - if name.startswith("mid_block"): - hidden_size = model.config.block_out_channels[-1] - elif name.startswith("up_blocks"): - block_id = int(name[len("up_blocks.")]) - hidden_size = list(reversed(model.config.block_out_channels))[block_id] - elif name.startswith("down_blocks"): - block_id = int(name[len("down_blocks.")]) - hidden_size = model.config.block_out_channels[block_id] - elif name.startswith("transformer_in"): + for attn_processor_name, attn_processor in unet.attn_processors.items(): + if is_3d and attn_processor_name.startswith("transformer_in"): # Note that the `8 * ...` comes from: https://github.com/huggingface/diffusers/blob/7139f0e874f10b2463caa8cbd585762a309d12d6/src/diffusers/models/unet_3d_condition.py#L148 - hidden_size = 8 * model.config.attention_head_dim - - lora_attn_procs[name] = LoRAAttnProcessor(hidden_size=hidden_size, cross_attention_dim=cross_attention_dim) - lora_attn_procs[name] = lora_attn_procs[name].to(model.device) + in_features = 8 * unet.config.attention_head_dim + + # Parse the attention module. + attn_module = unet + for n in attn_processor_name.split(".")[:-1]: + attn_module = getattr(attn_module, n) + + # Set the `lora_layer` attribute of the attention-related matrices. + attn_module.to_q.set_lora_layer( + LoRALinearLayer( + in_features=attn_module.to_q.in_features if in_features is None else in_features, + out_features=attn_module.to_q.out_features, + rank=rank, + ) + ) + attn_module.to_k.set_lora_layer( + LoRALinearLayer( + in_features=attn_module.to_k.in_features if in_features is None else in_features, + out_features=attn_module.to_k.out_features, + rank=rank, + ) + ) + attn_module.to_v.set_lora_layer( + LoRALinearLayer( + in_features=attn_module.to_v.in_features if in_features is None else in_features, + out_features=attn_module.to_v.out_features, + rank=rank, + ) + ) + attn_module.to_out[0].set_lora_layer( + LoRALinearLayer( + in_features=attn_module.to_out[0].in_features if in_features is None else in_features, + out_features=attn_module.to_out[0].out_features, + rank=rank, + ) + ) if mock_weights: - # add 1 to weights to mock trained weights with torch.no_grad(): - lora_attn_procs[name].to_q_lora.up.weight += 1 - lora_attn_procs[name].to_k_lora.up.weight += 1 - lora_attn_procs[name].to_v_lora.up.weight += 1 - lora_attn_procs[name].to_out_lora.up.weight += 1 - - return lora_attn_procs + attn_module.to_q.lora_layer.up.weight += 1 + attn_module.to_k.lora_layer.up.weight += 1 + attn_module.to_v.lora_layer.up.weight += 1 + attn_module.to_out[0].lora_layer.up.weight += 1 + + # Accumulate the LoRA params to optimize. + unet_lora_parameters.extend(attn_module.to_q.lora_layer.parameters()) + unet_lora_parameters.extend(attn_module.to_k.lora_layer.parameters()) + unet_lora_parameters.extend(attn_module.to_v.lora_layer.parameters()) + unet_lora_parameters.extend(attn_module.to_out[0].lora_layer.parameters()) + + return unet_lora_parameters + + +# def create_text_encoder_lora_attn_procs(text_encoder: nn.Module): +# text_lora_attn_procs = {} +# lora_attn_processor_class = ( +# LoRAAttnProcessor2_0 if hasattr(F, "scaled_dot_product_attention") else LoRAAttnProcessor +# ) +# for name, module in text_encoder_attn_modules(text_encoder): +# if isinstance(module.out_proj, nn.Linear): +# out_features = module.out_proj.out_features +# elif isinstance(module.out_proj, PatchedLoraProjection): +# out_features = module.out_proj.regular_linear_layer.out_features +# else: +# assert False, module.out_proj.__class__ + +# text_lora_attn_procs[name] = lora_attn_processor_class(hidden_size=out_features, cross_attention_dim=None) +# return text_lora_attn_procs + + +# def create_text_encoder_lora_layers(text_encoder: nn.Module): +# text_lora_attn_procs = create_text_encoder_lora_attn_procs(text_encoder) +# text_encoder_lora_layers = AttnProcsLayers(text_lora_attn_procs) +# return text_encoder_lora_layers + + +# def create_lora_3d_layers(model, mock_weights: bool = True): +# lora_attn_procs = {} +# for name in model.attn_processors.keys(): +# has_cross_attention = name.endswith("attn2.processor") and not ( +# name.startswith("transformer_in") or "temp_attentions" in name.split(".") +# ) +# cross_attention_dim = model.config.cross_attention_dim if has_cross_attention else None +# if name.startswith("mid_block"): +# hidden_size = model.config.block_out_channels[-1] +# elif name.startswith("up_blocks"): +# block_id = int(name[len("up_blocks.")]) +# hidden_size = list(reversed(model.config.block_out_channels))[block_id] +# elif name.startswith("down_blocks"): +# block_id = int(name[len("down_blocks.")]) +# hidden_size = model.config.block_out_channels[block_id] +# elif name.startswith("transformer_in"): +# # Note that the `8 * ...` comes from: https://github.com/huggingface/diffusers/blob/7139f0e874f10b2463caa8cbd585762a309d12d6/src/diffusers/models/unet_3d_condition.py#L148 +# hidden_size = 8 * model.config.attention_head_dim + +# lora_attn_procs[name] = LoRAAttnProcessor(hidden_size=hidden_size, cross_attention_dim=cross_attention_dim) +# lora_attn_procs[name] = lora_attn_procs[name].to(model.device) + +# if mock_weights: +# # add 1 to weights to mock trained weights +# with torch.no_grad(): +# lora_attn_procs[name].to_q_lora.up.weight += 1 +# lora_attn_procs[name].to_k_lora.up.weight += 1 +# lora_attn_procs[name].to_v_lora.up.weight += 1 +# lora_attn_procs[name].to_out_lora.up.weight += 1 + +# return lora_attn_procs def set_lora_weights(lora_attn_parameters, randn_weight=False, var=1.0): @@ -192,6 +226,8 @@ def state_dicts_almost_equal(sd1, sd2): @deprecate_after_peft_backend class LoraLoaderMixinTests(unittest.TestCase): + lora_rank = 4 + def get_dummy_components(self): torch.manual_seed(0) unet = UNet2DConditionModel( @@ -235,8 +271,10 @@ def get_dummy_components(self): text_encoder = CLIPTextModel(text_encoder_config) tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") - unet_lora_attn_procs, unet_lora_layers = create_unet_lora_layers(unet) - text_encoder_lora_layers = create_text_encoder_lora_layers(text_encoder) + unet_lora_params = create_unet_lora_layers(unet, rank=self.lora_rank) + text_encoder_lora_params = LoraLoaderMixin._modify_text_encoder( + text_encoder, dtype=torch.float32, rank=self.lora_rank + ) pipeline_components = { "unet": unet, @@ -248,11 +286,7 @@ def get_dummy_components(self): "feature_extractor": None, "image_encoder": None, } - lora_components = { - "unet_lora_layers": unet_lora_layers, - "text_encoder_lora_layers": text_encoder_lora_layers, - "unet_lora_attn_procs": unet_lora_attn_procs, - } + lora_components = {"unet_lora_params": unet_lora_params, "text_encoder_lora_params": text_encoder_lora_params} return pipeline_components, lora_components def get_dummy_inputs(self, with_generator=True): @@ -290,8 +324,8 @@ def create_lora_weight_file(self, tmpdirname): _, lora_components = self.get_dummy_components() LoraLoaderMixin.save_lora_weights( save_directory=tmpdirname, - unet_lora_layers=lora_components["unet_lora_layers"], - text_encoder_lora_layers=lora_components["text_encoder_lora_layers"], + unet_lora_layers=lora_components["unet_lora_params"], + text_encoder_lora_layers=lora_components["text_encoder_lora_params"], ) self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) @@ -311,68 +345,8 @@ def test_stable_diffusion_xformers_attn_processors(self): image = sd_pipe(**inputs).images assert image.shape == (1, 64, 64, 3) - @unittest.skipIf(not torch.cuda.is_available(), reason="xformers requires cuda") - def test_stable_diffusion_attn_processors(self): - # disable_full_determinism() - device = "cuda" # ensure determinism for the device-dependent torch.Generator - components, _ = self.get_dummy_components() - sd_pipe = StableDiffusionPipeline(**components) - sd_pipe = sd_pipe.to(device) - sd_pipe.set_progress_bar_config(disable=None) - - _, _, inputs = self.get_dummy_inputs() - - # run normal sd pipe - image = sd_pipe(**inputs).images - assert image.shape == (1, 64, 64, 3) - - # run attention slicing - sd_pipe.enable_attention_slicing() - image = sd_pipe(**inputs).images - assert image.shape == (1, 64, 64, 3) - - # run vae attention slicing - sd_pipe.enable_vae_slicing() - image = sd_pipe(**inputs).images - assert image.shape == (1, 64, 64, 3) - - # run lora attention - attn_processors, _ = create_unet_lora_layers(sd_pipe.unet) - attn_processors = {k: v.to("cuda") for k, v in attn_processors.items()} - sd_pipe.unet.set_attn_processor(attn_processors) - image = sd_pipe(**inputs).images - assert image.shape == (1, 64, 64, 3) - - @unittest.skipIf(not torch.cuda.is_available() or not is_xformers_available(), reason="xformers requires cuda") - def test_stable_diffusion_set_xformers_attn_processors(self): - # disable_full_determinism() - device = "cuda" # ensure determinism for the device-dependent torch.Generator - components, _ = self.get_dummy_components() - sd_pipe = StableDiffusionPipeline(**components) - sd_pipe = sd_pipe.to(device) - sd_pipe.set_progress_bar_config(disable=None) - - _, _, inputs = self.get_dummy_inputs() - - # run normal sd pipe - image = sd_pipe(**inputs).images - assert image.shape == (1, 64, 64, 3) - - # run lora xformers attention - attn_processors, _ = create_unet_lora_layers(sd_pipe.unet) - attn_processors = { - k: LoRAXFormersAttnProcessor(hidden_size=v.hidden_size, cross_attention_dim=v.cross_attention_dim) - for k, v in attn_processors.items() - } - attn_processors = {k: v.to("cuda") for k, v in attn_processors.items()} - sd_pipe.unet.set_attn_processor(attn_processors) - image = sd_pipe(**inputs).images - assert image.shape == (1, 64, 64, 3) - - # enable_full_determinism() - def test_stable_diffusion_lora(self): - components, _ = self.get_dummy_components() + components, lora_components = self.get_dummy_components() sd_pipe = StableDiffusionPipeline(**components) sd_pipe = sd_pipe.to(torch_device) sd_pipe.set_progress_bar_config(disable=None) @@ -385,9 +359,14 @@ def test_stable_diffusion_lora(self): image_slice = image[0, -3:, -3:, -1] # set lora layers - lora_attn_procs = create_lora_layers(sd_pipe.unet) - sd_pipe.unet.set_attn_processor(lora_attn_procs) - sd_pipe = sd_pipe.to(torch_device) + with tempfile.TemporaryDirectory() as tmpdirname: + LoraLoaderMixin.save_lora_weights( + save_directory=tmpdirname, + unet_lora_layers=lora_components["unet_lora_params"], + text_encoder_lora_layers=lora_components["text_encoder_lora_params"], + ) + self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) + sd_pipe.load_lora_weights(tmpdirname) # forward 2 _, _, inputs = self.get_dummy_inputs() @@ -420,8 +399,8 @@ def test_lora_save_load(self): with tempfile.TemporaryDirectory() as tmpdirname: LoraLoaderMixin.save_lora_weights( save_directory=tmpdirname, - unet_lora_layers=lora_components["unet_lora_layers"], - text_encoder_lora_layers=lora_components["text_encoder_lora_layers"], + unet_lora_layers=lora_components["unet_lora_params"], + text_encoder_lora_layers=lora_components["text_encoder_lora_params"], ) self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) sd_pipe.load_lora_weights(tmpdirname) @@ -434,7 +413,6 @@ def test_lora_save_load(self): def test_lora_save_load_no_safe_serialization(self): pipeline_components, lora_components = self.get_dummy_components() - unet_lora_attn_procs = lora_components["unet_lora_attn_procs"] sd_pipe = StableDiffusionPipeline(**pipeline_components) sd_pipe = sd_pipe.to(torch_device) sd_pipe.set_progress_bar_config(disable=None) @@ -445,9 +423,13 @@ def test_lora_save_load_no_safe_serialization(self): orig_image_slice = original_images[0, -3:, -3:, -1] with tempfile.TemporaryDirectory() as tmpdirname: - unet = sd_pipe.unet - unet.set_attn_processor(unet_lora_attn_procs) - unet.save_attn_procs(tmpdirname, safe_serialization=False) + LoraLoaderMixin.save_lora_weights( + save_directory=tmpdirname, + unet_lora_layers=lora_components["unet_lora_params"], + text_encoder_lora_layers=lora_components["text_encoder_lora_params"], + safe_serialization=False, + ) + self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.bin"))) sd_pipe.load_lora_weights(tmpdirname) @@ -469,8 +451,15 @@ def test_text_encoder_lora_monkey_patch(self): # monkey patch params = pipe._modify_text_encoder(pipe.text_encoder, pipe.lora_scale) - set_lora_weights(params, randn_weight=False) + with tempfile.TemporaryDirectory() as tmpdirname: + LoraLoaderMixin.save_lora_weights( + save_directory=tmpdirname, + unet_lora_layers=None, + text_encoder_lora_layers=params, + ) + self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) + pipe.load_lora_weights(tmpdirname) # inference with lora outputs_with_lora = pipe.text_encoder(**dummy_tokens)[0] @@ -480,13 +469,20 @@ def test_text_encoder_lora_monkey_patch(self): outputs_without_lora, outputs_with_lora ), "lora_up_weight are all zero, so the lora outputs should be the same to without lora outputs" - # create lora_attn_procs with randn up.weights - create_text_encoder_lora_attn_procs(pipe.text_encoder) - # monkey patch - params = pipe._modify_text_encoder(pipe.text_encoder, pipe.lora_scale) + pipeline_components, _ = self.get_dummy_components() + pipe = StableDiffusionPipeline(**pipeline_components) + params = pipe._modify_text_encoder(pipe.text_encoder, pipe.lora_scale) set_lora_weights(params, randn_weight=True) + with tempfile.TemporaryDirectory() as tmpdirname: + LoraLoaderMixin.save_lora_weights( + save_directory=tmpdirname, + unet_lora_layers=None, + text_encoder_lora_layers=params, + ) + self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) + pipe.load_lora_weights(tmpdirname) # inference with lora outputs_with_lora = pipe.text_encoder(**dummy_tokens)[0] @@ -508,8 +504,15 @@ def test_text_encoder_lora_remove_monkey_patch(self): # monkey patch params = pipe._modify_text_encoder(pipe.text_encoder, pipe.lora_scale) - set_lora_weights(params, randn_weight=True) + with tempfile.TemporaryDirectory() as tmpdirname: + LoraLoaderMixin.save_lora_weights( + save_directory=tmpdirname, + unet_lora_layers=None, + text_encoder_lora_layers=params, + ) + self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) + pipe.load_lora_weights(tmpdirname) # inference with lora outputs_with_lora = pipe.text_encoder(**dummy_tokens)[0] @@ -541,8 +544,8 @@ def test_text_encoder_lora_scale(self): with tempfile.TemporaryDirectory() as tmpdirname: LoraLoaderMixin.save_lora_weights( save_directory=tmpdirname, - unet_lora_layers=lora_components["unet_lora_layers"], - text_encoder_lora_layers=lora_components["text_encoder_lora_layers"], + unet_lora_layers=lora_components["unet_lora_params"], + text_encoder_lora_layers=lora_components["text_encoder_lora_params"], ) self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) sd_pipe.load_lora_weights(tmpdirname) @@ -592,13 +595,13 @@ def test_unload_lora_sd(self): orig_image_slice = original_images[0, -3:, -3:, -1] # Emulate training. - set_lora_weights(lora_components["unet_lora_layers"].parameters(), randn_weight=True) + set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True) set_lora_weights(lora_components["text_encoder_lora_layers"].parameters(), randn_weight=True) with tempfile.TemporaryDirectory() as tmpdirname: LoraLoaderMixin.save_lora_weights( save_directory=tmpdirname, - unet_lora_layers=lora_components["unet_lora_layers"], + unet_lora_layers=lora_components["unet_lora_params"], text_encoder_lora_layers=lora_components["text_encoder_lora_layers"], ) self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) @@ -677,7 +680,7 @@ def test_lora_save_load_with_xformers(self): with tempfile.TemporaryDirectory() as tmpdirname: LoraLoaderMixin.save_lora_weights( save_directory=tmpdirname, - unet_lora_layers=lora_components["unet_lora_layers"], + unet_lora_layers=lora_components["unet_lora_params"], text_encoder_lora_layers=lora_components["text_encoder_lora_layers"], ) self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) @@ -692,6 +695,8 @@ def test_lora_save_load_with_xformers(self): @deprecate_after_peft_backend class SDXInpaintLoraMixinTests(unittest.TestCase): + lora_rank = 4 + def get_dummy_inputs(self, device, seed=0, img_res=64, output_pil=True): # TODO: use tensor inputs instead of PIL, this is here just to leave the old expected_slices untouched if output_pil: @@ -765,6 +770,11 @@ def get_dummy_components(self): text_encoder = CLIPTextModel(text_encoder_config) tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + unet_lora_params = create_unet_lora_layers(unet, rank=self.lora_rank) + text_encoder_lora_params = LoraLoaderMixin._modify_text_encoder( + text_encoder, dtype=torch.float32, rank=self.lora_rank + ) + components = { "unet": unet, "scheduler": scheduler, @@ -775,12 +785,13 @@ def get_dummy_components(self): "feature_extractor": None, "image_encoder": None, } - return components + lora_components = {"unet_lora_params": unet_lora_params, "text_encoder_lora_params": text_encoder_lora_params} + return components, lora_components def test_stable_diffusion_inpaint_lora(self): device = "cpu" # ensure determinism for the device-dependent torch.Generator - components = self.get_dummy_components() + components, lora_components = self.get_dummy_components() sd_pipe = StableDiffusionInpaintPipeline(**components) sd_pipe = sd_pipe.to(torch_device) sd_pipe.set_progress_bar_config(disable=None) @@ -792,9 +803,14 @@ def test_stable_diffusion_inpaint_lora(self): image_slice = image[0, -3:, -3:, -1] # set lora layers - lora_attn_procs = create_lora_layers(sd_pipe.unet) - sd_pipe.unet.set_attn_processor(lora_attn_procs) - sd_pipe = sd_pipe.to(torch_device) + with tempfile.TemporaryDirectory() as tmpdirname: + LoraLoaderMixin.save_lora_weights( + save_directory=tmpdirname, + unet_lora_layers=lora_components["unet_lora_params"], + text_encoder_lora_layers=lora_components["text_encoder_lora_params"], + ) + self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) + sd_pipe.load_lora_weights(tmpdirname) # forward 2 inputs = self.get_dummy_inputs(device) @@ -814,6 +830,8 @@ def test_stable_diffusion_inpaint_lora(self): @deprecate_after_peft_backend class SDXLLoraLoaderMixinTests(unittest.TestCase): + lora_rank = 4 + def get_dummy_components(self): torch.manual_seed(0) unet = UNet2DConditionModel( @@ -871,9 +889,13 @@ def get_dummy_components(self): text_encoder_2 = CLIPTextModelWithProjection(text_encoder_config) tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") - unet_lora_attn_procs, unet_lora_layers = create_unet_lora_layers(unet) - text_encoder_one_lora_layers = create_text_encoder_lora_layers(text_encoder) - text_encoder_two_lora_layers = create_text_encoder_lora_layers(text_encoder_2) + unet_lora_params = create_unet_lora_layers(unet, rank=self.lora_rank) + text_encoder_lora_params = StableDiffusionXLLoraLoaderMixin._modify_text_encoder( + text_encoder, dtype=torch.float32, rank=self.lora_rank + ) + text_encoder_two_lora_params = StableDiffusionXLLoraLoaderMixin( + text_encoder_2, dtype=torch.float32, rank=self.lora_rank + ) pipeline_components = { "unet": unet, @@ -887,10 +909,9 @@ def get_dummy_components(self): "feature_extractor": None, } lora_components = { - "unet_lora_layers": unet_lora_layers, - "text_encoder_one_lora_layers": text_encoder_one_lora_layers, - "text_encoder_two_lora_layers": text_encoder_two_lora_layers, - "unet_lora_attn_procs": unet_lora_attn_procs, + "unet_lora_params": unet_lora_params, + "text_encoder_lora_params": text_encoder_lora_params, + "text_encoder_two_lora_params": text_encoder_two_lora_params, } return pipeline_components, lora_components @@ -929,9 +950,9 @@ def test_lora_save_load(self): with tempfile.TemporaryDirectory() as tmpdirname: StableDiffusionXLPipeline.save_lora_weights( save_directory=tmpdirname, - unet_lora_layers=lora_components["unet_lora_layers"], - text_encoder_lora_layers=lora_components["text_encoder_one_lora_layers"], - text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_layers"], + unet_lora_layers=lora_components["unet_lora_params"], + text_encoder_lora_layers=lora_components["text_encoder_lora_params"], + text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_params"], ) self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) sd_pipe.load_lora_weights(tmpdirname) @@ -951,16 +972,16 @@ def test_unload_lora_sdxl(self): orig_image_slice = original_images[0, -3:, -3:, -1] # Emulate training. - set_lora_weights(lora_components["unet_lora_layers"].parameters(), randn_weight=True) - set_lora_weights(lora_components["text_encoder_one_lora_layers"].parameters(), randn_weight=True) - set_lora_weights(lora_components["text_encoder_two_lora_layers"].parameters(), randn_weight=True) + set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True) + set_lora_weights(lora_components["text_encoder_lora_params"].parameters(), randn_weight=True) + set_lora_weights(lora_components["text_encoder_two_lora_params"].parameters(), randn_weight=True) with tempfile.TemporaryDirectory() as tmpdirname: StableDiffusionXLPipeline.save_lora_weights( save_directory=tmpdirname, - unet_lora_layers=lora_components["unet_lora_layers"], - text_encoder_lora_layers=lora_components["text_encoder_one_lora_layers"], - text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_layers"], + unet_lora_layers=lora_components["unet_lora_params"], + text_encoder_lora_layers=lora_components["text_encoder_lora_params"], + text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_params"], ) self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) sd_pipe.load_lora_weights(tmpdirname) @@ -992,9 +1013,9 @@ def test_load_lora_locally(self): with tempfile.TemporaryDirectory() as tmpdirname: StableDiffusionXLPipeline.save_lora_weights( save_directory=tmpdirname, - unet_lora_layers=lora_components["unet_lora_layers"], - text_encoder_lora_layers=lora_components["text_encoder_one_lora_layers"], - text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_layers"], + unet_lora_layers=lora_components["unet_lora_params"], + text_encoder_lora_layers=lora_components["text_encoder_lora_params"], + text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_params"], safe_serialization=False, ) self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.bin"))) @@ -1015,9 +1036,9 @@ def test_text_encoder_lora_state_dict_unchanged(self): with tempfile.TemporaryDirectory() as tmpdirname: StableDiffusionXLPipeline.save_lora_weights( save_directory=tmpdirname, - unet_lora_layers=lora_components["unet_lora_layers"], - text_encoder_lora_layers=lora_components["text_encoder_one_lora_layers"], - text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_layers"], + unet_lora_layers=lora_components["unet_lora_params"], + text_encoder_lora_layers=lora_components["text_encoder_lora_params"], + text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_params"], safe_serialization=False, ) self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.bin"))) @@ -1050,9 +1071,9 @@ def test_load_lora_locally_safetensors(self): with tempfile.TemporaryDirectory() as tmpdirname: StableDiffusionXLPipeline.save_lora_weights( save_directory=tmpdirname, - unet_lora_layers=lora_components["unet_lora_layers"], - text_encoder_lora_layers=lora_components["text_encoder_one_lora_layers"], - text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_layers"], + unet_lora_layers=lora_components["unet_lora_params"], + text_encoder_lora_layers=lora_components["text_encoder_lora_params"], + text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_params"], safe_serialization=True, ) self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) @@ -1069,16 +1090,16 @@ def test_lora_fuse_nan(self): _, _, pipeline_inputs = self.get_dummy_inputs(with_generator=False) # Emulate training. - set_lora_weights(lora_components["unet_lora_layers"].parameters(), randn_weight=True) - set_lora_weights(lora_components["text_encoder_one_lora_layers"].parameters(), randn_weight=True) - set_lora_weights(lora_components["text_encoder_two_lora_layers"].parameters(), randn_weight=True) + set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True) + set_lora_weights(lora_components["text_encoder_lora_params"].parameters(), randn_weight=True) + set_lora_weights(lora_components["text_encoder_two_lora_params"].parameters(), randn_weight=True) with tempfile.TemporaryDirectory() as tmpdirname: StableDiffusionXLPipeline.save_lora_weights( save_directory=tmpdirname, - unet_lora_layers=lora_components["unet_lora_layers"], - text_encoder_lora_layers=lora_components["text_encoder_one_lora_layers"], - text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_layers"], + unet_lora_layers=lora_components["unet_lora_params"], + text_encoder_lora_layers=lora_components["text_encoder_lora_params"], + text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_params"], safe_serialization=True, ) self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) @@ -1113,16 +1134,16 @@ def test_lora_fusion(self): orig_image_slice = original_images[0, -3:, -3:, -1] # Emulate training. - set_lora_weights(lora_components["unet_lora_layers"].parameters(), randn_weight=True) - set_lora_weights(lora_components["text_encoder_one_lora_layers"].parameters(), randn_weight=True) - set_lora_weights(lora_components["text_encoder_two_lora_layers"].parameters(), randn_weight=True) + set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True) + set_lora_weights(lora_components["text_encoder_lora_params"].parameters(), randn_weight=True) + set_lora_weights(lora_components["text_encoder_two_lora_params"].parameters(), randn_weight=True) with tempfile.TemporaryDirectory() as tmpdirname: StableDiffusionXLPipeline.save_lora_weights( save_directory=tmpdirname, - unet_lora_layers=lora_components["unet_lora_layers"], - text_encoder_lora_layers=lora_components["text_encoder_one_lora_layers"], - text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_layers"], + unet_lora_layers=lora_components["unet_lora_params"], + text_encoder_lora_layers=lora_components["text_encoder_lora_params"], + text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_params"], safe_serialization=True, ) self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) @@ -1146,16 +1167,16 @@ def test_unfuse_lora(self): orig_image_slice = original_images[0, -3:, -3:, -1] # Emulate training. - set_lora_weights(lora_components["unet_lora_layers"].parameters(), randn_weight=True) - set_lora_weights(lora_components["text_encoder_one_lora_layers"].parameters(), randn_weight=True) - set_lora_weights(lora_components["text_encoder_two_lora_layers"].parameters(), randn_weight=True) + set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True) + set_lora_weights(lora_components["text_encoder_lora_params"].parameters(), randn_weight=True) + set_lora_weights(lora_components["text_encoder_two_lora_params"].parameters(), randn_weight=True) with tempfile.TemporaryDirectory() as tmpdirname: StableDiffusionXLPipeline.save_lora_weights( save_directory=tmpdirname, - unet_lora_layers=lora_components["unet_lora_layers"], - text_encoder_lora_layers=lora_components["text_encoder_one_lora_layers"], - text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_layers"], + unet_lora_layers=lora_components["unet_lora_params"], + text_encoder_lora_layers=lora_components["text_encoder_lora_params"], + text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_params"], safe_serialization=True, ) self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) @@ -1191,16 +1212,16 @@ def test_lora_fusion_is_not_affected_by_unloading(self): _ = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images # Emulate training. - set_lora_weights(lora_components["unet_lora_layers"].parameters(), randn_weight=True) - set_lora_weights(lora_components["text_encoder_one_lora_layers"].parameters(), randn_weight=True) - set_lora_weights(lora_components["text_encoder_two_lora_layers"].parameters(), randn_weight=True) + set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True) + set_lora_weights(lora_components["text_encoder_lora_params"].parameters(), randn_weight=True) + set_lora_weights(lora_components["text_encoder_two_lora_params"].parameters(), randn_weight=True) with tempfile.TemporaryDirectory() as tmpdirname: StableDiffusionXLPipeline.save_lora_weights( save_directory=tmpdirname, - unet_lora_layers=lora_components["unet_lora_layers"], - text_encoder_lora_layers=lora_components["text_encoder_one_lora_layers"], - text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_layers"], + unet_lora_layers=lora_components["unet_lora_params"], + text_encoder_lora_layers=lora_components["text_encoder_lora_params"], + text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_params"], safe_serialization=True, ) self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) @@ -1230,16 +1251,16 @@ def test_fuse_lora_with_different_scales(self): _ = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images # Emulate training. - set_lora_weights(lora_components["unet_lora_layers"].parameters(), randn_weight=True) - set_lora_weights(lora_components["text_encoder_one_lora_layers"].parameters(), randn_weight=True) - set_lora_weights(lora_components["text_encoder_two_lora_layers"].parameters(), randn_weight=True) + set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True) + set_lora_weights(lora_components["text_encoder_lora_params"].parameters(), randn_weight=True) + set_lora_weights(lora_components["text_encoder_two_lora_params"].parameters(), randn_weight=True) with tempfile.TemporaryDirectory() as tmpdirname: StableDiffusionXLPipeline.save_lora_weights( save_directory=tmpdirname, - unet_lora_layers=lora_components["unet_lora_layers"], - text_encoder_lora_layers=lora_components["text_encoder_one_lora_layers"], - text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_layers"], + unet_lora_layers=lora_components["unet_lora_params"], + text_encoder_lora_layers=lora_components["text_encoder_lora_params"], + text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_params"], safe_serialization=True, ) self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) @@ -1255,9 +1276,9 @@ def test_fuse_lora_with_different_scales(self): with tempfile.TemporaryDirectory() as tmpdirname: StableDiffusionXLPipeline.save_lora_weights( save_directory=tmpdirname, - unet_lora_layers=lora_components["unet_lora_layers"], - text_encoder_lora_layers=lora_components["text_encoder_one_lora_layers"], - text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_layers"], + unet_lora_layers=lora_components["unet_lora_params"], + text_encoder_lora_layers=lora_components["text_encoder_lora_params"], + text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_params"], safe_serialization=True, ) self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) @@ -1282,16 +1303,16 @@ def test_with_different_scales(self): original_imagee_slice = original_images[0, -3:, -3:, -1] # Emulate training. - set_lora_weights(lora_components["unet_lora_layers"].parameters(), randn_weight=True) - set_lora_weights(lora_components["text_encoder_one_lora_layers"].parameters(), randn_weight=True) - set_lora_weights(lora_components["text_encoder_two_lora_layers"].parameters(), randn_weight=True) + set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True) + set_lora_weights(lora_components["text_encoder_lora_params"].parameters(), randn_weight=True) + set_lora_weights(lora_components["text_encoder_two_lora_params"].parameters(), randn_weight=True) with tempfile.TemporaryDirectory() as tmpdirname: StableDiffusionXLPipeline.save_lora_weights( save_directory=tmpdirname, - unet_lora_layers=lora_components["unet_lora_layers"], - text_encoder_lora_layers=lora_components["text_encoder_one_lora_layers"], - text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_layers"], + unet_lora_layers=lora_components["unet_lora_params"], + text_encoder_lora_layers=lora_components["text_encoder_lora_params"], + text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_params"], safe_serialization=True, ) self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) @@ -1330,16 +1351,16 @@ def test_with_different_scales_fusion_equivalence(self): images_slice = images[0, -3:, -3:, -1] # Emulate training. - set_lora_weights(lora_components["unet_lora_layers"].parameters(), randn_weight=True, var=0.1) - set_lora_weights(lora_components["text_encoder_one_lora_layers"].parameters(), randn_weight=True, var=0.1) - set_lora_weights(lora_components["text_encoder_two_lora_layers"].parameters(), randn_weight=True, var=0.1) + set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True, var=0.1) + set_lora_weights(lora_components["text_encoder_lora_params"].parameters(), randn_weight=True, var=0.1) + set_lora_weights(lora_components["text_encoder_two_lora_params"].parameters(), randn_weight=True, var=0.1) with tempfile.TemporaryDirectory() as tmpdirname: StableDiffusionXLPipeline.save_lora_weights( save_directory=tmpdirname, - unet_lora_layers=lora_components["unet_lora_layers"], - text_encoder_lora_layers=lora_components["text_encoder_one_lora_layers"], - text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_layers"], + unet_lora_layers=lora_components["unet_lora_params"], + text_encoder_lora_layers=lora_components["text_encoder_lora_params"], + text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_params"], safe_serialization=True, ) self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) @@ -1377,16 +1398,16 @@ def test_save_load_fused_lora_modules(self): _, _, pipeline_inputs = self.get_dummy_inputs(with_generator=False) # Emulate training. - set_lora_weights(lora_components["unet_lora_layers"].parameters(), randn_weight=True, var=0.1) - set_lora_weights(lora_components["text_encoder_one_lora_layers"].parameters(), randn_weight=True, var=0.1) - set_lora_weights(lora_components["text_encoder_two_lora_layers"].parameters(), randn_weight=True, var=0.1) + set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True, var=0.1) + set_lora_weights(lora_components["text_encoder_lora_params"].parameters(), randn_weight=True, var=0.1) + set_lora_weights(lora_components["text_encoder_two_lora_params"].parameters(), randn_weight=True, var=0.1) with tempfile.TemporaryDirectory() as tmpdirname: StableDiffusionXLPipeline.save_lora_weights( save_directory=tmpdirname, - unet_lora_layers=lora_components["unet_lora_layers"], - text_encoder_lora_layers=lora_components["text_encoder_one_lora_layers"], - text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_layers"], + unet_lora_layers=lora_components["unet_lora_params"], + text_encoder_lora_layers=lora_components["text_encoder_lora_params"], + text_encoder_2_lora_layers=lora_components["text_encoder_two_lora_params"], safe_serialization=True, ) self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) @@ -1460,10 +1481,10 @@ def test_lora_processors(self): with torch.no_grad(): sample1 = model(**inputs_dict).sample - lora_attn_procs = create_lora_layers(model) + lora_params = create_unet_lora_layers(model) # make sure we can set a list of attention processors - model.set_attn_processor(lora_attn_procs) + model.load_attn_procs(lora_params) model.to(torch_device) # test that attn processors can be set to itself @@ -1480,120 +1501,6 @@ def test_lora_processors(self): # sample 2 and sample 3 should be different assert (sample2 - sample3).abs().max() > 1e-4 - def test_lora_save_load(self): - # enable deterministic behavior for gradient checkpointing - init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common() - - init_dict["attention_head_dim"] = (8, 16) - - torch.manual_seed(0) - model = self.model_class(**init_dict) - model.to(torch_device) - - with torch.no_grad(): - old_sample = model(**inputs_dict).sample - - lora_attn_procs = create_lora_layers(model) - model.set_attn_processor(lora_attn_procs) - - with torch.no_grad(): - sample = model(**inputs_dict, cross_attention_kwargs={"scale": 0.5}).sample - - with tempfile.TemporaryDirectory() as tmpdirname: - model.save_attn_procs(tmpdirname, safe_serialization=False) - self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.bin"))) - torch.manual_seed(0) - new_model = self.model_class(**init_dict) - new_model.to(torch_device) - new_model.load_attn_procs(tmpdirname) - - with torch.no_grad(): - new_sample = new_model(**inputs_dict, cross_attention_kwargs={"scale": 0.5}).sample - - assert (sample - new_sample).abs().max() < 5e-4 - - # LoRA and no LoRA should NOT be the same - assert (sample - old_sample).abs().max() > 5e-4 - - def test_lora_save_load_safetensors(self): - # enable deterministic behavior for gradient checkpointing - init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common() - - init_dict["attention_head_dim"] = (8, 16) - - torch.manual_seed(0) - model = self.model_class(**init_dict) - model.to(torch_device) - - with torch.no_grad(): - old_sample = model(**inputs_dict).sample - - lora_attn_procs = create_lora_layers(model) - model.set_attn_processor(lora_attn_procs) - - with torch.no_grad(): - sample = model(**inputs_dict, cross_attention_kwargs={"scale": 0.5}).sample - - with tempfile.TemporaryDirectory() as tmpdirname: - model.save_attn_procs(tmpdirname, safe_serialization=True) - self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) - torch.manual_seed(0) - new_model = self.model_class(**init_dict) - new_model.to(torch_device) - new_model.load_attn_procs(tmpdirname) - - with torch.no_grad(): - new_sample = new_model(**inputs_dict, cross_attention_kwargs={"scale": 0.5}).sample - - assert (sample - new_sample).abs().max() < 1e-4 - - # LoRA and no LoRA should NOT be the same - assert (sample - old_sample).abs().max() > 1e-4 - - def test_lora_save_safetensors_load_torch(self): - # enable deterministic behavior for gradient checkpointing - init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common() - - init_dict["attention_head_dim"] = (8, 16) - - torch.manual_seed(0) - model = self.model_class(**init_dict) - model.to(torch_device) - - lora_attn_procs = create_lora_layers(model, mock_weights=False) - model.set_attn_processor(lora_attn_procs) - # Saving as torch, properly reloads with directly filename - with tempfile.TemporaryDirectory() as tmpdirname: - model.save_attn_procs(tmpdirname, safe_serialization=True) - self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) - torch.manual_seed(0) - new_model = self.model_class(**init_dict) - new_model.to(torch_device) - new_model.load_attn_procs(tmpdirname, weight_name="pytorch_lora_weights.safetensors") - - def test_lora_save_torch_force_load_safetensors_error(self): - # enable deterministic behavior for gradient checkpointing - init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common() - - init_dict["attention_head_dim"] = (8, 16) - - torch.manual_seed(0) - model = self.model_class(**init_dict) - model.to(torch_device) - - lora_attn_procs = create_lora_layers(model, mock_weights=False) - model.set_attn_processor(lora_attn_procs) - # Saving as torch, properly reloads with directly filename - with tempfile.TemporaryDirectory() as tmpdirname: - model.save_attn_procs(tmpdirname, safe_serialization=False) - self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.bin"))) - torch.manual_seed(0) - new_model = self.model_class(**init_dict) - new_model.to(torch_device) - with self.assertRaises(IOError) as e: - new_model.load_attn_procs(tmpdirname, use_safetensors=True) - self.assertIn("Error no file named pytorch_lora_weights.safetensors", str(e.exception)) - def test_lora_on_off(self, expected_max_diff=1e-3): # enable deterministic behavior for gradient checkpointing init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common() @@ -1607,8 +1514,8 @@ def test_lora_on_off(self, expected_max_diff=1e-3): with torch.no_grad(): old_sample = model(**inputs_dict).sample - lora_attn_procs = create_lora_layers(model) - model.set_attn_processor(lora_attn_procs) + lora_params = create_unet_lora_layers(model) + model.load_attn_procs(lora_params) with torch.no_grad(): sample = model(**inputs_dict, cross_attention_kwargs={"scale": 0.0}).sample @@ -1637,8 +1544,8 @@ def test_lora_xformers_on_off(self, expected_max_diff=6e-4): torch.manual_seed(0) model = self.model_class(**init_dict) model.to(torch_device) - lora_attn_procs = create_lora_layers(model) - model.set_attn_processor(lora_attn_procs) + lora_params = create_unet_lora_layers(model) + model.load_attn_procs(lora_params) # default with torch.no_grad(): @@ -1712,10 +1619,10 @@ def test_lora_processors(self): with torch.no_grad(): sample1 = model(**inputs_dict).sample - lora_attn_procs = create_lora_3d_layers(model) + unet_lora_params = create_unet_lora_layers(model, is_3d=True) # make sure we can set a list of attention processors - model.set_attn_processor(lora_attn_procs) + model.load_attn_procs(unet_lora_params) model.to(torch_device) # test that attn processors can be set to itself @@ -1732,172 +1639,6 @@ def test_lora_processors(self): # sample 2 and sample 3 should be different assert (sample2 - sample3).abs().max() > 3e-3 - def test_lora_save_load(self): - init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common() - - init_dict["attention_head_dim"] = 8 - - torch.manual_seed(0) - model = self.model_class(**init_dict) - model.to(torch_device) - - with torch.no_grad(): - old_sample = model(**inputs_dict).sample - - lora_attn_procs = create_lora_3d_layers(model) - model.set_attn_processor(lora_attn_procs) - - with torch.no_grad(): - sample = model(**inputs_dict, cross_attention_kwargs={"scale": 0.5}).sample - - with tempfile.TemporaryDirectory() as tmpdirname: - model.save_attn_procs(tmpdirname, safe_serialization=False) - self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.bin"))) - torch.manual_seed(0) - new_model = self.model_class(**init_dict) - new_model.to(torch_device) - new_model.load_attn_procs(tmpdirname) - - with torch.no_grad(): - new_sample = new_model(**inputs_dict, cross_attention_kwargs={"scale": 0.5}).sample - - assert (sample - new_sample).abs().max() < 5e-3 - - # LoRA and no LoRA should NOT be the same - assert (sample - old_sample).abs().max() > 1e-4 - - def test_lora_save_load_safetensors(self): - init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common() - - init_dict["attention_head_dim"] = 8 - - torch.manual_seed(0) - model = self.model_class(**init_dict) - model.to(torch_device) - - with torch.no_grad(): - old_sample = model(**inputs_dict).sample - - lora_attn_procs = create_lora_3d_layers(model) - model.set_attn_processor(lora_attn_procs) - - with torch.no_grad(): - sample = model(**inputs_dict, cross_attention_kwargs={"scale": 0.5}).sample - - with tempfile.TemporaryDirectory() as tmpdirname: - model.save_attn_procs(tmpdirname, safe_serialization=True) - self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) - torch.manual_seed(0) - new_model = self.model_class(**init_dict) - new_model.to(torch_device) - new_model.load_attn_procs(tmpdirname) - - with torch.no_grad(): - new_sample = new_model(**inputs_dict, cross_attention_kwargs={"scale": 0.5}).sample - - assert (sample - new_sample).abs().max() < 3e-3 - - # LoRA and no LoRA should NOT be the same - assert (sample - old_sample).abs().max() > 1e-4 - - def test_lora_save_safetensors_load_torch(self): - # enable deterministic behavior for gradient checkpointing - init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common() - - init_dict["attention_head_dim"] = 8 - - torch.manual_seed(0) - model = self.model_class(**init_dict) - model.to(torch_device) - - lora_attn_procs = create_lora_3d_layers(model, mock_weights=False) - model.set_attn_processor(lora_attn_procs) - # Saving as torch, properly reloads with directly filename - with tempfile.TemporaryDirectory() as tmpdirname: - model.save_attn_procs(tmpdirname) - self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) - torch.manual_seed(0) - new_model = self.model_class(**init_dict) - new_model.to(torch_device) - new_model.load_attn_procs(tmpdirname, weight_name="pytorch_lora_weights.safetensors") - - def test_lora_save_torch_force_load_safetensors_error(self): - init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common() - - init_dict["attention_head_dim"] = 8 - - torch.manual_seed(0) - model = self.model_class(**init_dict) - model.to(torch_device) - - lora_attn_procs = create_lora_3d_layers(model, mock_weights=False) - model.set_attn_processor(lora_attn_procs) - # Saving as torch, properly reloads with directly filename - with tempfile.TemporaryDirectory() as tmpdirname: - model.save_attn_procs(tmpdirname, safe_serialization=False) - self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.bin"))) - torch.manual_seed(0) - new_model = self.model_class(**init_dict) - new_model.to(torch_device) - with self.assertRaises(IOError) as e: - new_model.load_attn_procs(tmpdirname, use_safetensors=True) - self.assertIn("Error no file named pytorch_lora_weights.safetensors", str(e.exception)) - - def test_lora_on_off(self): - init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common() - - init_dict["attention_head_dim"] = 8 - - torch.manual_seed(0) - model = self.model_class(**init_dict) - model.to(torch_device) - - with torch.no_grad(): - old_sample = model(**inputs_dict).sample - - lora_attn_procs = create_lora_3d_layers(model) - model.set_attn_processor(lora_attn_procs) - - with torch.no_grad(): - sample = model(**inputs_dict, cross_attention_kwargs={"scale": 0.0}).sample - - model.set_default_attn_processor() - - with torch.no_grad(): - new_sample = model(**inputs_dict).sample - - assert (sample - new_sample).abs().max() < 1e-4 - assert (sample - old_sample).abs().max() < 3e-3 - - @unittest.skipIf( - torch_device != "cuda" or not is_xformers_available(), - reason="XFormers attention is only available with CUDA and `xformers` installed", - ) - def test_lora_xformers_on_off(self): - # enable deterministic behavior for gradient checkpointing - init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common() - - init_dict["attention_head_dim"] = 4 - - torch.manual_seed(0) - model = self.model_class(**init_dict) - model.to(torch_device) - lora_attn_procs = create_lora_3d_layers(model) - model.set_attn_processor(lora_attn_procs) - - # default - with torch.no_grad(): - sample = model(**inputs_dict).sample - - model.enable_xformers_memory_efficient_attention() - on_sample = model(**inputs_dict).sample - - model.disable_xformers_memory_efficient_attention() - off_sample = model(**inputs_dict).sample - - assert (sample - on_sample).abs().max() < 1e-4 - assert (sample - off_sample).abs().max() < 1e-4 - @slow @deprecate_after_peft_backend From a5e0951f8621c666f487cb2356dec59fa6088cf5 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 08:53:44 +0530 Subject: [PATCH 02/44] fix --- tests/lora/test_lora_layers_old_backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index 2b99655c74c0..cbd274d504d0 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -893,7 +893,7 @@ def get_dummy_components(self): text_encoder_lora_params = StableDiffusionXLLoraLoaderMixin._modify_text_encoder( text_encoder, dtype=torch.float32, rank=self.lora_rank ) - text_encoder_two_lora_params = StableDiffusionXLLoraLoaderMixin( + text_encoder_two_lora_params = StableDiffusionXLLoraLoaderMixin._modify_text_encoder( text_encoder_2, dtype=torch.float32, rank=self.lora_rank ) From 2f273ea6051bfecd58f6737e2896511b54c96acb Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 09:06:18 +0530 Subject: [PATCH 03/44] wrap into unet_lora_state_dict --- tests/lora/test_lora_layers_old_backend.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index cbd274d504d0..7f6989f8ae99 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -48,6 +48,7 @@ XFormersAttnProcessor, ) from diffusers.models.lora import LoRALinearLayer +from diffusers.training_utils import unet_lora_state_dict from diffusers.utils.import_utils import is_xformers_available from diffusers.utils.testing_utils import ( deprecate_after_peft_backend, @@ -144,7 +145,7 @@ def create_unet_lora_layers(unet: nn.Module, rank=4, is_3d=False, mock_weights=T unet_lora_parameters.extend(attn_module.to_v.lora_layer.parameters()) unet_lora_parameters.extend(attn_module.to_out[0].lora_layer.parameters()) - return unet_lora_parameters + return unet_lora_state_dict(unet) # def create_text_encoder_lora_attn_procs(text_encoder: nn.Module): From 015e4a1f57fbaa3beaf023cfead83397e7fddfcc Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 09:11:08 +0530 Subject: [PATCH 04/44] utilize text_encoder_lora_params --- tests/lora/test_lora_layers_old_backend.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index 7f6989f8ae99..f545f13580bd 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -40,7 +40,7 @@ UNet2DConditionModel, UNet3DConditionModel, ) -from diffusers.loaders import LoraLoaderMixin, StableDiffusionXLLoraLoaderMixin +from diffusers.loaders import LoraLoaderMixin, StableDiffusionXLLoraLoaderMixin, text_encoder_lora_state_dict from diffusers.models.attention_processor import ( Attention, AttnProcessor, @@ -276,6 +276,7 @@ def get_dummy_components(self): text_encoder_lora_params = LoraLoaderMixin._modify_text_encoder( text_encoder, dtype=torch.float32, rank=self.lora_rank ) + text_encoder_lora_params = text_encoder_lora_state_dict(text_encoder_lora_params) pipeline_components = { "unet": unet, @@ -775,6 +776,7 @@ def get_dummy_components(self): text_encoder_lora_params = LoraLoaderMixin._modify_text_encoder( text_encoder, dtype=torch.float32, rank=self.lora_rank ) + text_encoder_lora_params = text_encoder_lora_state_dict(text_encoder_lora_params) components = { "unet": unet, @@ -894,9 +896,11 @@ def get_dummy_components(self): text_encoder_lora_params = StableDiffusionXLLoraLoaderMixin._modify_text_encoder( text_encoder, dtype=torch.float32, rank=self.lora_rank ) + text_encoder_lora_params = text_encoder_lora_state_dict(text_encoder_lora_params) text_encoder_two_lora_params = StableDiffusionXLLoraLoaderMixin._modify_text_encoder( text_encoder_2, dtype=torch.float32, rank=self.lora_rank ) + text_encoder_two_lora_params = text_encoder_lora_state_dict(text_encoder_two_lora_params) pipeline_components = { "unet": unet, From 08289e1b828b85cc024e127dff4aab8f0a11970a Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 09:13:51 +0530 Subject: [PATCH 05/44] utilize text_encoder_attn_modules --- tests/lora/test_lora_layers_old_backend.py | 35 +++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index f545f13580bd..44abfd26554f 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -40,7 +40,7 @@ UNet2DConditionModel, UNet3DConditionModel, ) -from diffusers.loaders import LoraLoaderMixin, StableDiffusionXLLoraLoaderMixin, text_encoder_lora_state_dict +from diffusers.loaders import LoraLoaderMixin, StableDiffusionXLLoraLoaderMixin from diffusers.models.attention_processor import ( Attention, AttnProcessor, @@ -88,6 +88,39 @@ # return lora_attn_procs +def text_encoder_attn_modules(text_encoder): + attn_modules = [] + + if isinstance(text_encoder, (CLIPTextModel, CLIPTextModelWithProjection)): + for i, layer in enumerate(text_encoder.text_model.encoder.layers): + name = f"text_model.encoder.layers.{i}.self_attn" + mod = layer.self_attn + attn_modules.append((name, mod)) + else: + raise ValueError(f"do not know how to get attention modules for: {text_encoder.__class__.__name__}") + + return attn_modules + + +def text_encoder_lora_state_dict(text_encoder): + state_dict = {} + + for name, module in text_encoder_attn_modules(text_encoder): + for k, v in module.q_proj.lora_linear_layer.state_dict().items(): + state_dict[f"{name}.q_proj.lora_linear_layer.{k}"] = v + + for k, v in module.k_proj.lora_linear_layer.state_dict().items(): + state_dict[f"{name}.k_proj.lora_linear_layer.{k}"] = v + + for k, v in module.v_proj.lora_linear_layer.state_dict().items(): + state_dict[f"{name}.v_proj.lora_linear_layer.{k}"] = v + + for k, v in module.out_proj.lora_linear_layer.state_dict().items(): + state_dict[f"{name}.out_proj.lora_linear_layer.{k}"] = v + + return state_dict + + def create_unet_lora_layers(unet: nn.Module, rank=4, is_3d=False, mock_weights=True): unet_lora_parameters = [] in_features = None From 604936f385f5bd05cc225c78b9172d85e054c42a Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 09:15:10 +0530 Subject: [PATCH 06/44] debug --- tests/lora/test_lora_layers_old_backend.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index 44abfd26554f..203842556eb7 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -90,6 +90,7 @@ def text_encoder_attn_modules(text_encoder): attn_modules = [] + print(f"text encoder type: {type(text_encoder)}") if isinstance(text_encoder, (CLIPTextModel, CLIPTextModelWithProjection)): for i, layer in enumerate(text_encoder.text_model.encoder.layers): From 91c58887a283452ab0221f0446e9ce2a5d680681 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 09:23:20 +0530 Subject: [PATCH 07/44] debug --- tests/lora/test_lora_layers_old_backend.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index 203842556eb7..ef5669f1446a 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -310,7 +310,7 @@ def get_dummy_components(self): text_encoder_lora_params = LoraLoaderMixin._modify_text_encoder( text_encoder, dtype=torch.float32, rank=self.lora_rank ) - text_encoder_lora_params = text_encoder_lora_state_dict(text_encoder_lora_params) + text_encoder_lora_params = text_encoder_lora_state_dict(text_encoder) pipeline_components = { "unet": unet, @@ -810,7 +810,7 @@ def get_dummy_components(self): text_encoder_lora_params = LoraLoaderMixin._modify_text_encoder( text_encoder, dtype=torch.float32, rank=self.lora_rank ) - text_encoder_lora_params = text_encoder_lora_state_dict(text_encoder_lora_params) + text_encoder_lora_params = text_encoder_lora_state_dict(text_encoder) components = { "unet": unet, @@ -930,11 +930,11 @@ def get_dummy_components(self): text_encoder_lora_params = StableDiffusionXLLoraLoaderMixin._modify_text_encoder( text_encoder, dtype=torch.float32, rank=self.lora_rank ) - text_encoder_lora_params = text_encoder_lora_state_dict(text_encoder_lora_params) + text_encoder_lora_params = text_encoder_lora_state_dict(text_encoder) text_encoder_two_lora_params = StableDiffusionXLLoraLoaderMixin._modify_text_encoder( text_encoder_2, dtype=torch.float32, rank=self.lora_rank ) - text_encoder_two_lora_params = text_encoder_lora_state_dict(text_encoder_two_lora_params) + text_encoder_two_lora_params = text_encoder_lora_state_dict(text_encoder_2) pipeline_components = { "unet": unet, From 3f10766804f7f6e403e5e835c38b800270e3a3e2 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 09:23:48 +0530 Subject: [PATCH 08/44] remove print --- tests/lora/test_lora_layers_old_backend.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index ef5669f1446a..73ee4a1a9661 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -90,7 +90,6 @@ def text_encoder_attn_modules(text_encoder): attn_modules = [] - print(f"text encoder type: {type(text_encoder)}") if isinstance(text_encoder, (CLIPTextModel, CLIPTextModelWithProjection)): for i, layer in enumerate(text_encoder.text_model.encoder.layers): From 4ce6112c532a654f06fb1f3dfb72ad9a3580819e Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 09:30:35 +0530 Subject: [PATCH 09/44] don't use text encoder for test_stable_diffusion_lora --- tests/lora/test_lora_layers_old_backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index 73ee4a1a9661..f1bcb2bc0180 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -398,7 +398,7 @@ def test_stable_diffusion_lora(self): LoraLoaderMixin.save_lora_weights( save_directory=tmpdirname, unet_lora_layers=lora_components["unet_lora_params"], - text_encoder_lora_layers=lora_components["text_encoder_lora_params"], + text_encoder_lora_layers=None, ) self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) sd_pipe.load_lora_weights(tmpdirname) From 7020cec27dde080f3e829272dccf8f3831a04b78 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 09:37:11 +0530 Subject: [PATCH 10/44] load the procs. --- tests/lora/test_lora_layers_old_backend.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index f1bcb2bc0180..19954c0ed19d 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -394,14 +394,15 @@ def test_stable_diffusion_lora(self): image_slice = image[0, -3:, -3:, -1] # set lora layers - with tempfile.TemporaryDirectory() as tmpdirname: - LoraLoaderMixin.save_lora_weights( - save_directory=tmpdirname, - unet_lora_layers=lora_components["unet_lora_params"], - text_encoder_lora_layers=None, - ) - self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) - sd_pipe.load_lora_weights(tmpdirname) + sd_pipe.unet.load_attn_procs(lora_components["unet_lora_params"]) + # with tempfile.TemporaryDirectory() as tmpdirname: + # LoraLoaderMixin.save_lora_weights( + # save_directory=tmpdirname, + # unet_lora_layers=lora_components["unet_lora_params"], + # text_encoder_lora_layers=None, + # ) + # self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) + # sd_pipe.load_lora_weights(tmpdirname) # forward 2 _, _, inputs = self.get_dummy_inputs() From a72aba288435d195234672daf4cd71113585929f Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 09:42:32 +0530 Subject: [PATCH 11/44] set_default_attn_processor --- tests/lora/test_lora_layers_old_backend.py | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index 19954c0ed19d..f6fc57d5bb94 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -122,7 +122,6 @@ def text_encoder_lora_state_dict(text_encoder): def create_unet_lora_layers(unet: nn.Module, rank=4, is_3d=False, mock_weights=True): - unet_lora_parameters = [] in_features = None for attn_processor_name, attn_processor in unet.attn_processors.items(): @@ -172,12 +171,6 @@ def create_unet_lora_layers(unet: nn.Module, rank=4, is_3d=False, mock_weights=T attn_module.to_v.lora_layer.up.weight += 1 attn_module.to_out[0].lora_layer.up.weight += 1 - # Accumulate the LoRA params to optimize. - unet_lora_parameters.extend(attn_module.to_q.lora_layer.parameters()) - unet_lora_parameters.extend(attn_module.to_k.lora_layer.parameters()) - unet_lora_parameters.extend(attn_module.to_v.lora_layer.parameters()) - unet_lora_parameters.extend(attn_module.to_out[0].lora_layer.parameters()) - return unet_lora_state_dict(unet) @@ -385,6 +378,7 @@ def test_stable_diffusion_lora(self): sd_pipe = StableDiffusionPipeline(**components) sd_pipe = sd_pipe.to(torch_device) sd_pipe.set_progress_bar_config(disable=None) + sd_pipe.set_default_attn_processor() # forward 1 _, _, inputs = self.get_dummy_inputs() @@ -395,14 +389,6 @@ def test_stable_diffusion_lora(self): # set lora layers sd_pipe.unet.load_attn_procs(lora_components["unet_lora_params"]) - # with tempfile.TemporaryDirectory() as tmpdirname: - # LoraLoaderMixin.save_lora_weights( - # save_directory=tmpdirname, - # unet_lora_layers=lora_components["unet_lora_params"], - # text_encoder_lora_layers=None, - # ) - # self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) - # sd_pipe.load_lora_weights(tmpdirname) # forward 2 _, _, inputs = self.get_dummy_inputs() From bdb2f6bac764583fc5c0b1f3c0c09e022f79036f Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 09:44:53 +0530 Subject: [PATCH 12/44] fix: set_default_attn_processor call. --- tests/lora/test_lora_layers_old_backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index f6fc57d5bb94..69108793ed8c 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -378,7 +378,7 @@ def test_stable_diffusion_lora(self): sd_pipe = StableDiffusionPipeline(**components) sd_pipe = sd_pipe.to(torch_device) sd_pipe.set_progress_bar_config(disable=None) - sd_pipe.set_default_attn_processor() + sd_pipe.unet.set_default_attn_processor() # forward 1 _, _, inputs = self.get_dummy_inputs() From 3a35ceb565515ea1d72733ce3bd979b030c2acf8 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 09:46:48 +0530 Subject: [PATCH 13/44] fix: lora_components[unet_lora_params] --- tests/lora/test_lora_layers_old_backend.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index 69108793ed8c..bb3e7ea597d4 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -818,6 +818,7 @@ def test_stable_diffusion_inpaint_lora(self): sd_pipe = StableDiffusionInpaintPipeline(**components) sd_pipe = sd_pipe.to(torch_device) sd_pipe.set_progress_bar_config(disable=None) + sd_pipe.unet.set_default_attn_processor() # forward 1 inputs = self.get_dummy_inputs(device) @@ -826,14 +827,7 @@ def test_stable_diffusion_inpaint_lora(self): image_slice = image[0, -3:, -3:, -1] # set lora layers - with tempfile.TemporaryDirectory() as tmpdirname: - LoraLoaderMixin.save_lora_weights( - save_directory=tmpdirname, - unet_lora_layers=lora_components["unet_lora_params"], - text_encoder_lora_layers=lora_components["text_encoder_lora_params"], - ) - self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) - sd_pipe.load_lora_weights(tmpdirname) + sd_pipe.unet.load_attn_procs(lora_components["unet_lora_params"]) # forward 2 inputs = self.get_dummy_inputs(device) From 18af8e791903684bca01a9c715f38bc946a84ca3 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 09:59:33 +0530 Subject: [PATCH 14/44] checking for 3d. --- tests/lora/test_lora_layers_old_backend.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index bb3e7ea597d4..dd7e213687e5 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -123,11 +123,16 @@ def text_encoder_lora_state_dict(text_encoder): def create_unet_lora_layers(unet: nn.Module, rank=4, is_3d=False, mock_weights=True): in_features = None + out_features = None for attn_processor_name, attn_processor in unet.attn_processors.items(): if is_3d and attn_processor_name.startswith("transformer_in"): # Note that the `8 * ...` comes from: https://github.com/huggingface/diffusers/blob/7139f0e874f10b2463caa8cbd585762a309d12d6/src/diffusers/models/unet_3d_condition.py#L148 in_features = 8 * unet.config.attention_head_dim + has_cross_attention = attn_processor_name.endswith("attn2.processor") and not ( + attn_processor_name.startswith("transformer_in") or "temp_attentions" in attn_processor_name.split(".") + ) + out_features = unet.config.cross_attention_dim if has_cross_attention else None # Parse the attention module. attn_module = unet @@ -138,28 +143,28 @@ def create_unet_lora_layers(unet: nn.Module, rank=4, is_3d=False, mock_weights=T attn_module.to_q.set_lora_layer( LoRALinearLayer( in_features=attn_module.to_q.in_features if in_features is None else in_features, - out_features=attn_module.to_q.out_features, + out_features=attn_module.to_q.out_features if out_features is None else out_features, rank=rank, ) ) attn_module.to_k.set_lora_layer( LoRALinearLayer( in_features=attn_module.to_k.in_features if in_features is None else in_features, - out_features=attn_module.to_k.out_features, + out_features=attn_module.to_k.out_features if out_features is None else out_features, rank=rank, ) ) attn_module.to_v.set_lora_layer( LoRALinearLayer( in_features=attn_module.to_v.in_features if in_features is None else in_features, - out_features=attn_module.to_v.out_features, + out_features=attn_module.to_v.out_features if out_features is None else out_features, rank=rank, ) ) attn_module.to_out[0].set_lora_layer( LoRALinearLayer( in_features=attn_module.to_out[0].in_features if in_features is None else in_features, - out_features=attn_module.to_out[0].out_features, + out_features=attn_module.to_out[0].out_features if out_features is None else out_features, rank=rank, ) ) From 5b70ddbb2b18b63e060258ccf2579ae04eb3d8d6 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 10:05:13 +0530 Subject: [PATCH 15/44] 3d. --- tests/lora/test_lora_layers_old_backend.py | 91 +++++++++++++++++----- 1 file changed, 73 insertions(+), 18 deletions(-) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index dd7e213687e5..130784e5f790 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -121,18 +121,71 @@ def text_encoder_lora_state_dict(text_encoder): return state_dict -def create_unet_lora_layers(unet: nn.Module, rank=4, is_3d=False, mock_weights=True): - in_features = None - out_features = None +def create_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True): + for attn_processor_name, attn_processor in unet.attn_processors.items(): + # Parse the attention module. + attn_module = unet + for n in attn_processor_name.split(".")[:-1]: + attn_module = getattr(attn_module, n) + # Set the `lora_layer` attribute of the attention-related matrices. + attn_module.to_q.set_lora_layer( + LoRALinearLayer( + in_features=attn_module.to_q.in_features, + out_features=attn_module.to_q.out_features, + rank=rank, + ) + ) + attn_module.to_k.set_lora_layer( + LoRALinearLayer( + in_features=attn_module.to_k.in_features, + out_features=attn_module.to_k.out_features, + rank=rank, + ) + ) + attn_module.to_v.set_lora_layer( + LoRALinearLayer( + in_features=attn_module.to_v.in_features, + out_features=attn_module.to_v.out_features, + rank=rank, + ) + ) + attn_module.to_out[0].set_lora_layer( + LoRALinearLayer( + in_features=attn_module.to_out[0].in_features, + out_features=attn_module.to_out[0].out_features, + rank=rank, + ) + ) + + if mock_weights: + with torch.no_grad(): + attn_module.to_q.lora_layer.up.weight += 1 + attn_module.to_k.lora_layer.up.weight += 1 + attn_module.to_v.lora_layer.up.weight += 1 + attn_module.to_out[0].lora_layer.up.weight += 1 + + return unet_lora_state_dict(unet) + + +def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True): for attn_processor_name, attn_processor in unet.attn_processors.items(): - if is_3d and attn_processor_name.startswith("transformer_in"): + has_cross_attention = attn_processor_name.endswith("attn2.processor") and not ( + attn_processor_name.startswith("transformer_in") or "temp_attentions" in attn_processor_name.split(".") + ) + cross_attention_dim = unet.config.cross_attention_dim if has_cross_attention else None + + if attn_processor_name.startswith("mid_block"): + hidden_size = unet.config.block_out_channels[-1] + elif attn_processor_name.startswith("up_blocks"): + block_id = int(attn_processor_name[len("up_blocks.")]) + hidden_size = list(reversed(unet.config.block_out_channels))[block_id] + elif attn_processor_name.startswith("down_blocks"): + block_id = int(attn_processor_name[len("down_blocks.")]) + hidden_size = unet.config.block_out_channels[block_id] + elif attn_processor_name.startswith("transformer_in"): # Note that the `8 * ...` comes from: https://github.com/huggingface/diffusers/blob/7139f0e874f10b2463caa8cbd585762a309d12d6/src/diffusers/models/unet_3d_condition.py#L148 - in_features = 8 * unet.config.attention_head_dim - has_cross_attention = attn_processor_name.endswith("attn2.processor") and not ( - attn_processor_name.startswith("transformer_in") or "temp_attentions" in attn_processor_name.split(".") - ) - out_features = unet.config.cross_attention_dim if has_cross_attention else None + hidden_size = 8 * unet.config.attention_head_dim # Parse the attention module. attn_module = unet @@ -142,29 +195,31 @@ def create_unet_lora_layers(unet: nn.Module, rank=4, is_3d=False, mock_weights=T # Set the `lora_layer` attribute of the attention-related matrices. attn_module.to_q.set_lora_layer( LoRALinearLayer( - in_features=attn_module.to_q.in_features if in_features is None else in_features, - out_features=attn_module.to_q.out_features if out_features is None else out_features, + in_features=hidden_size, + out_features=attn_module.to_q.out_features if cross_attention_dim is None else cross_attention_dim, rank=rank, ) ) attn_module.to_k.set_lora_layer( LoRALinearLayer( - in_features=attn_module.to_k.in_features if in_features is None else in_features, - out_features=attn_module.to_k.out_features if out_features is None else out_features, + in_features=attn_module.to_k.in_features, + out_features=attn_module.to_k.out_features if cross_attention_dim is None else cross_attention_dim, rank=rank, ) ) attn_module.to_v.set_lora_layer( LoRALinearLayer( - in_features=attn_module.to_v.in_features if in_features is None else in_features, - out_features=attn_module.to_v.out_features if out_features is None else out_features, + in_features=attn_module.to_v.in_features, + out_features=attn_module.to_v.out_features if cross_attention_dim is None else cross_attention_dim, rank=rank, ) ) attn_module.to_out[0].set_lora_layer( LoRALinearLayer( - in_features=attn_module.to_out[0].in_features if in_features is None else in_features, - out_features=attn_module.to_out[0].out_features if out_features is None else out_features, + in_features=attn_module.to_out[0].in_features, + out_features=attn_module.to_out[0].out_features + if cross_attention_dim is None + else cross_attention_dim, rank=rank, ) ) @@ -1643,7 +1698,7 @@ def test_lora_processors(self): with torch.no_grad(): sample1 = model(**inputs_dict).sample - unet_lora_params = create_unet_lora_layers(model, is_3d=True) + unet_lora_params = create_3d_unet_lora_layers(model) # make sure we can set a list of attention processors model.load_attn_procs(unet_lora_params) From 5d006d4801ac8fea663f57b1734e467951b7d8b1 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 10:07:06 +0530 Subject: [PATCH 16/44] more fixes. --- tests/lora/test_lora_layers_old_backend.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index 130784e5f790..92b56235104e 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -202,22 +202,22 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True): ) attn_module.to_k.set_lora_layer( LoRALinearLayer( - in_features=attn_module.to_k.in_features, + in_features=hidden_size, out_features=attn_module.to_k.out_features if cross_attention_dim is None else cross_attention_dim, rank=rank, ) ) attn_module.to_v.set_lora_layer( LoRALinearLayer( - in_features=attn_module.to_v.in_features, + in_features=hidden_size, out_features=attn_module.to_v.out_features if cross_attention_dim is None else cross_attention_dim, rank=rank, ) ) attn_module.to_out[0].set_lora_layer( LoRALinearLayer( - in_features=attn_module.to_out[0].in_features, - out_features=attn_module.to_out[0].out_features + in_features=hidden_size, + out_features=attn_module.to_out[0].out_features if cross_attention_dim is None else cross_attention_dim, rank=rank, From df47cc4af00e7324c236d526fbaf307aeb167205 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 10:11:49 +0530 Subject: [PATCH 17/44] debug --- tests/lora/test_lora_layers_old_backend.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index 92b56235104e..2be049008571 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -169,7 +169,7 @@ def create_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True): def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True): - for attn_processor_name, attn_processor in unet.attn_processors.items(): + for attn_processor_name in unet.attn_processors.keys(): has_cross_attention = attn_processor_name.endswith("attn2.processor") and not ( attn_processor_name.startswith("transformer_in") or "temp_attentions" in attn_processor_name.split(".") ) @@ -193,6 +193,10 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True): attn_module = getattr(attn_module, n) # Set the `lora_layer` attribute of the attention-related matrices. + print(f"Hidden size: {hidden_size} in_features (q): {attn_module.to_q.in_features}") + print(f"Hidden size: {hidden_size} in_features (k): {attn_module.to_k.in_features}") + print(f"Hidden size: {hidden_size} in_features (v): {attn_module.to_v.in_features}") + print(f"Hidden size: {hidden_size} in_features (out): {attn_module.to_out[0].in_features}") attn_module.to_q.set_lora_layer( LoRALinearLayer( in_features=hidden_size, From 01bd812f89bfc5c07c310b4f6e8c2e87d76cac2e Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 10:15:31 +0530 Subject: [PATCH 18/44] debug --- tests/lora/test_lora_layers_old_backend.py | 29 +++++++++++----------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index 2be049008571..fe5fb74b1832 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -174,16 +174,17 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True): attn_processor_name.startswith("transformer_in") or "temp_attentions" in attn_processor_name.split(".") ) cross_attention_dim = unet.config.cross_attention_dim if has_cross_attention else None - - if attn_processor_name.startswith("mid_block"): - hidden_size = unet.config.block_out_channels[-1] - elif attn_processor_name.startswith("up_blocks"): - block_id = int(attn_processor_name[len("up_blocks.")]) - hidden_size = list(reversed(unet.config.block_out_channels))[block_id] - elif attn_processor_name.startswith("down_blocks"): - block_id = int(attn_processor_name[len("down_blocks.")]) - hidden_size = unet.config.block_out_channels[block_id] - elif attn_processor_name.startswith("transformer_in"): + hidden_size = None + + # if attn_processor_name.startswith("mid_block"): + # hidden_size = unet.config.block_out_channels[-1] + # elif attn_processor_name.startswith("up_blocks"): + # block_id = int(attn_processor_name[len("up_blocks.")]) + # hidden_size = list(reversed(unet.config.block_out_channels))[block_id] + # elif attn_processor_name.startswith("down_blocks"): + # block_id = int(attn_processor_name[len("down_blocks.")]) + # hidden_size = unet.config.block_out_channels[block_id] + if attn_processor_name.startswith("transformer_in"): # Note that the `8 * ...` comes from: https://github.com/huggingface/diffusers/blob/7139f0e874f10b2463caa8cbd585762a309d12d6/src/diffusers/models/unet_3d_condition.py#L148 hidden_size = 8 * unet.config.attention_head_dim @@ -199,28 +200,28 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True): print(f"Hidden size: {hidden_size} in_features (out): {attn_module.to_out[0].in_features}") attn_module.to_q.set_lora_layer( LoRALinearLayer( - in_features=hidden_size, + in_features=attn_module.to_q.in_features if hidden_size is None else hidden_size, out_features=attn_module.to_q.out_features if cross_attention_dim is None else cross_attention_dim, rank=rank, ) ) attn_module.to_k.set_lora_layer( LoRALinearLayer( - in_features=hidden_size, + in_features=attn_module.to_k.in_features if hidden_size is None else hidden_size, out_features=attn_module.to_k.out_features if cross_attention_dim is None else cross_attention_dim, rank=rank, ) ) attn_module.to_v.set_lora_layer( LoRALinearLayer( - in_features=hidden_size, + in_features=attn_module.to_v.in_features if hidden_size is None else hidden_size, out_features=attn_module.to_v.out_features if cross_attention_dim is None else cross_attention_dim, rank=rank, ) ) attn_module.to_out[0].set_lora_layer( LoRALinearLayer( - in_features=hidden_size, + in_features=attn_module.to_out[0].in_features if hidden_size is None else hidden_size, out_features=attn_module.to_out[0].out_features if cross_attention_dim is None else cross_attention_dim, From a4a46f01017785b057eaa864a279b80a75dee94f Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 10:17:34 +0530 Subject: [PATCH 19/44] debug --- tests/lora/test_lora_layers_old_backend.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index fe5fb74b1832..7707bfdea268 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -186,6 +186,7 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True): # hidden_size = unet.config.block_out_channels[block_id] if attn_processor_name.startswith("transformer_in"): # Note that the `8 * ...` comes from: https://github.com/huggingface/diffusers/blob/7139f0e874f10b2463caa8cbd585762a309d12d6/src/diffusers/models/unet_3d_condition.py#L148 + print("Within transformer_in") hidden_size = 8 * unet.config.attention_head_dim # Parse the attention module. From 7d70f37ded680fb84ee802eafedf0d02edbb2cef Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 10:19:28 +0530 Subject: [PATCH 20/44] debug --- tests/lora/test_lora_layers_old_backend.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index 7707bfdea268..e78eb5242737 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -174,7 +174,6 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True): attn_processor_name.startswith("transformer_in") or "temp_attentions" in attn_processor_name.split(".") ) cross_attention_dim = unet.config.cross_attention_dim if has_cross_attention else None - hidden_size = None # if attn_processor_name.startswith("mid_block"): # hidden_size = unet.config.block_out_channels[-1] @@ -201,29 +200,37 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True): print(f"Hidden size: {hidden_size} in_features (out): {attn_module.to_out[0].in_features}") attn_module.to_q.set_lora_layer( LoRALinearLayer( - in_features=attn_module.to_q.in_features if hidden_size is None else hidden_size, + in_features=attn_module.to_q.in_features + if not attn_processor_name.startswith("transformer_in") + else hidden_size, out_features=attn_module.to_q.out_features if cross_attention_dim is None else cross_attention_dim, rank=rank, ) ) attn_module.to_k.set_lora_layer( LoRALinearLayer( - in_features=attn_module.to_k.in_features if hidden_size is None else hidden_size, + in_features=attn_module.to_k.in_features + if not attn_processor_name.startswith("transformer_in") + else hidden_size, out_features=attn_module.to_k.out_features if cross_attention_dim is None else cross_attention_dim, rank=rank, ) ) attn_module.to_v.set_lora_layer( LoRALinearLayer( - in_features=attn_module.to_v.in_features if hidden_size is None else hidden_size, + in_features=attn_module.to_v.in_features + if not attn_processor_name.startswith("transformer_in") + else hidden_size, out_features=attn_module.to_v.out_features if cross_attention_dim is None else cross_attention_dim, rank=rank, ) ) attn_module.to_out[0].set_lora_layer( LoRALinearLayer( - in_features=attn_module.to_out[0].in_features if hidden_size is None else hidden_size, - out_features=attn_module.to_out[0].out_features + in_features=attn_module.to_out[0].in_features + if not attn_processor_name.startswith("transformer_in") + else hidden_size, + out_features=attn_module.to_out[0].out_features if cross_attention_dim is None else cross_attention_dim, rank=rank, From fe668cdf05b8bad29dbf646c35d5a458fea05028 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 10:23:32 +0530 Subject: [PATCH 21/44] more debug --- tests/lora/test_lora_layers_old_backend.py | 35 +++++++++------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index e78eb5242737..b39218580562 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -175,15 +175,15 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True): ) cross_attention_dim = unet.config.cross_attention_dim if has_cross_attention else None - # if attn_processor_name.startswith("mid_block"): - # hidden_size = unet.config.block_out_channels[-1] - # elif attn_processor_name.startswith("up_blocks"): - # block_id = int(attn_processor_name[len("up_blocks.")]) - # hidden_size = list(reversed(unet.config.block_out_channels))[block_id] - # elif attn_processor_name.startswith("down_blocks"): - # block_id = int(attn_processor_name[len("down_blocks.")]) - # hidden_size = unet.config.block_out_channels[block_id] - if attn_processor_name.startswith("transformer_in"): + if attn_processor_name.startswith("mid_block"): + hidden_size = unet.config.block_out_channels[-1] + elif attn_processor_name.startswith("up_blocks"): + block_id = int(attn_processor_name[len("up_blocks.")]) + hidden_size = list(reversed(unet.config.block_out_channels))[block_id] + elif attn_processor_name.startswith("down_blocks"): + block_id = int(attn_processor_name[len("down_blocks.")]) + hidden_size = unet.config.block_out_channels[block_id] + elif attn_processor_name.startswith("transformer_in"): # Note that the `8 * ...` comes from: https://github.com/huggingface/diffusers/blob/7139f0e874f10b2463caa8cbd585762a309d12d6/src/diffusers/models/unet_3d_condition.py#L148 print("Within transformer_in") hidden_size = 8 * unet.config.attention_head_dim @@ -198,38 +198,31 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True): print(f"Hidden size: {hidden_size} in_features (k): {attn_module.to_k.in_features}") print(f"Hidden size: {hidden_size} in_features (v): {attn_module.to_v.in_features}") print(f"Hidden size: {hidden_size} in_features (out): {attn_module.to_out[0].in_features}") + attn_module.to_q.set_lora_layer( LoRALinearLayer( - in_features=attn_module.to_q.in_features - if not attn_processor_name.startswith("transformer_in") - else hidden_size, + in_features=hidden_size, out_features=attn_module.to_q.out_features if cross_attention_dim is None else cross_attention_dim, rank=rank, ) ) attn_module.to_k.set_lora_layer( LoRALinearLayer( - in_features=attn_module.to_k.in_features - if not attn_processor_name.startswith("transformer_in") - else hidden_size, + in_features=hidden_size, out_features=attn_module.to_k.out_features if cross_attention_dim is None else cross_attention_dim, rank=rank, ) ) attn_module.to_v.set_lora_layer( LoRALinearLayer( - in_features=attn_module.to_v.in_features - if not attn_processor_name.startswith("transformer_in") - else hidden_size, + in_features=hidden_size, out_features=attn_module.to_v.out_features if cross_attention_dim is None else cross_attention_dim, rank=rank, ) ) attn_module.to_out[0].set_lora_layer( LoRALinearLayer( - in_features=attn_module.to_out[0].in_features - if not attn_processor_name.startswith("transformer_in") - else hidden_size, + in_features=hidden_size, out_features=attn_module.to_out[0].out_features if cross_attention_dim is None else cross_attention_dim, From 2b097de27b7fa29ead3edb5f1849b0cdb444ce1f Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 10:25:47 +0530 Subject: [PATCH 22/44] more debug --- tests/lora/test_lora_layers_old_backend.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index b39218580562..285c412c3b4f 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -194,10 +194,10 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True): attn_module = getattr(attn_module, n) # Set the `lora_layer` attribute of the attention-related matrices. - print(f"Hidden size: {hidden_size} in_features (q): {attn_module.to_q.in_features}") - print(f"Hidden size: {hidden_size} in_features (k): {attn_module.to_k.in_features}") - print(f"Hidden size: {hidden_size} in_features (v): {attn_module.to_v.in_features}") - print(f"Hidden size: {hidden_size} in_features (out): {attn_module.to_out[0].in_features}") + print(f"Hidden size != attn_module.to_q.in_features: {hidden_size != attn_module.to_q.in_features}") + print(f"Hidden size != to_k.in_features: {hidden_size != attn_module.to_k.in_features}") + print(f"Hidden size != attn_module.to_v.in_features: {hidden_size != attn_module.to_v.in_features}") + print(f"Hidden size != attn_module.to_out[0].in_features: {hidden_size != attn_module.to_out[0].in_features}") attn_module.to_q.set_lora_layer( LoRALinearLayer( From 940a4a0aa55760049f300c519ac38cb25cd66e51 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 10:30:16 +0530 Subject: [PATCH 23/44] more debug --- src/diffusers/models/modeling_utils.py | 3 ++- tests/lora/test_lora_layers_old_backend.py | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py index 546c5b20f937..008d687090bf 100644 --- a/src/diffusers/models/modeling_utils.py +++ b/src/diffusers/models/modeling_utils.py @@ -148,7 +148,8 @@ def load_model_dict_into_meta( if param_name not in empty_state_dict: unexpected_keys.append(param_name) continue - + + print(f"load_model_dict_into_meta param_name {param_name}") if empty_state_dict[param_name].shape != param.shape: model_name_or_path_str = f"{model_name_or_path} " if model_name_or_path is not None else "" raise ValueError( diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index 285c412c3b4f..2b92130d4663 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -194,10 +194,10 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True): attn_module = getattr(attn_module, n) # Set the `lora_layer` attribute of the attention-related matrices. - print(f"Hidden size != attn_module.to_q.in_features: {hidden_size != attn_module.to_q.in_features}") - print(f"Hidden size != to_k.in_features: {hidden_size != attn_module.to_k.in_features}") - print(f"Hidden size != attn_module.to_v.in_features: {hidden_size != attn_module.to_v.in_features}") - print(f"Hidden size != attn_module.to_out[0].in_features: {hidden_size != attn_module.to_out[0].in_features}") + # print(f"Hidden size != attn_module.to_q.in_features: {hidden_size != attn_module.to_q.in_features}") + # print(f"Hidden size != attn_module.to_k.in_features: {hidden_size != attn_module.to_k.in_features}") + # print(f"Hidden size != attn_module.to_v.in_features: {hidden_size != attn_module.to_v.in_features}") + # print(f"Hidden size != attn_module.to_out[0].in_features: {hidden_size != attn_module.to_out[0].in_features}") attn_module.to_q.set_lora_layer( LoRALinearLayer( From 3b38d0cc1f2ab983b950511b16e252bc72174841 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 10:30:53 +0530 Subject: [PATCH 24/44] more debug --- src/diffusers/models/modeling_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py index 008d687090bf..f0d802e7cd7e 100644 --- a/src/diffusers/models/modeling_utils.py +++ b/src/diffusers/models/modeling_utils.py @@ -149,8 +149,8 @@ def load_model_dict_into_meta( unexpected_keys.append(param_name) continue - print(f"load_model_dict_into_meta param_name {param_name}") if empty_state_dict[param_name].shape != param.shape: + print(f"load_model_dict_into_meta param_name {param_name}") model_name_or_path_str = f"{model_name_or_path} " if model_name_or_path is not None else "" raise ValueError( f"Cannot load {model_name_or_path_str}because {param_name} expected shape {empty_state_dict[param_name]}, but got {param.shape}. If you want to instead overwrite randomly initialized weights, please make sure to pass both `low_cpu_mem_usage=False` and `ignore_mismatched_sizes=True`. For more information, see also: https://github.com/huggingface/diffusers/issues/1619#issuecomment-1345604389 as an example." From 3c05c1078f13dc9defb761450c70cad21504e650 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 10:33:13 +0530 Subject: [PATCH 25/44] more debug --- tests/lora/test_lora_layers_old_backend.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index 2b92130d4663..92076594e182 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -202,21 +202,21 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True): attn_module.to_q.set_lora_layer( LoRALinearLayer( in_features=hidden_size, - out_features=attn_module.to_q.out_features if cross_attention_dim is None else cross_attention_dim, + out_features=attn_module.to_q.out_features if cross_attention_dim is None else max(attn_module.to_q.out_features, cross_attention_dim), rank=rank, ) ) attn_module.to_k.set_lora_layer( LoRALinearLayer( in_features=hidden_size, - out_features=attn_module.to_k.out_features if cross_attention_dim is None else cross_attention_dim, + out_features=attn_module.to_k.out_features if cross_attention_dim is None else max(attn_module.to_k.out_features, cross_attention_dim), rank=rank, ) ) attn_module.to_v.set_lora_layer( LoRALinearLayer( in_features=hidden_size, - out_features=attn_module.to_v.out_features if cross_attention_dim is None else cross_attention_dim, + out_features=attn_module.to_v.out_features if cross_attention_dim is None else max(attn_module.to_v.out_features, cross_attention_dim), rank=rank, ) ) @@ -225,7 +225,7 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True): in_features=hidden_size, out_features=attn_module.to_out[0].out_features if cross_attention_dim is None - else cross_attention_dim, + else max(attn_module.to_out[0].out_features, cross_attention_dim), rank=rank, ) ) From 620df7dfe27084723a0cf2f5ad4e3a3314c54017 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 10:37:14 +0530 Subject: [PATCH 26/44] more debug --- src/diffusers/models/modeling_utils.py | 2 +- tests/lora/test_lora_layers_old_backend.py | 23 +++++++++++++--------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py index f0d802e7cd7e..5714d027d484 100644 --- a/src/diffusers/models/modeling_utils.py +++ b/src/diffusers/models/modeling_utils.py @@ -148,7 +148,7 @@ def load_model_dict_into_meta( if param_name not in empty_state_dict: unexpected_keys.append(param_name) continue - + if empty_state_dict[param_name].shape != param.shape: print(f"load_model_dict_into_meta param_name {param_name}") model_name_or_path_str = f"{model_name_or_path} " if model_name_or_path is not None else "" diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index 92076594e182..3f7be59781aa 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -185,7 +185,6 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True): hidden_size = unet.config.block_out_channels[block_id] elif attn_processor_name.startswith("transformer_in"): # Note that the `8 * ...` comes from: https://github.com/huggingface/diffusers/blob/7139f0e874f10b2463caa8cbd585762a309d12d6/src/diffusers/models/unet_3d_condition.py#L148 - print("Within transformer_in") hidden_size = 8 * unet.config.attention_head_dim # Parse the attention module. @@ -198,31 +197,37 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True): # print(f"Hidden size != attn_module.to_k.in_features: {hidden_size != attn_module.to_k.in_features}") # print(f"Hidden size != attn_module.to_v.in_features: {hidden_size != attn_module.to_v.in_features}") # print(f"Hidden size != attn_module.to_out[0].in_features: {hidden_size != attn_module.to_out[0].in_features}") - + attn_module.to_q.set_lora_layer( LoRALinearLayer( - in_features=hidden_size, - out_features=attn_module.to_q.out_features if cross_attention_dim is None else max(attn_module.to_q.out_features, cross_attention_dim), + in_features=max(attn_module.to_q.in_features, hidden_size), + out_features=attn_module.to_q.out_features + if cross_attention_dim is None + else max(attn_module.to_q.out_features, cross_attention_dim), rank=rank, ) ) attn_module.to_k.set_lora_layer( LoRALinearLayer( - in_features=hidden_size, - out_features=attn_module.to_k.out_features if cross_attention_dim is None else max(attn_module.to_k.out_features, cross_attention_dim), + in_features=max(attn_module.to_k.in_features, hidden_size), + out_features=attn_module.to_k.out_features + if cross_attention_dim is None + else max(attn_module.to_k.out_features, cross_attention_dim), rank=rank, ) ) attn_module.to_v.set_lora_layer( LoRALinearLayer( - in_features=hidden_size, - out_features=attn_module.to_v.out_features if cross_attention_dim is None else max(attn_module.to_v.out_features, cross_attention_dim), + in_features=max(attn_module.to_v.in_features, hidden_size), + out_features=attn_module.to_v.out_features + if cross_attention_dim is None + else max(attn_module.to_v.out_features, cross_attention_dim), rank=rank, ) ) attn_module.to_out[0].set_lora_layer( LoRALinearLayer( - in_features=hidden_size, + in_features=max(attn_module.to_out[0].in_features, hidden_size), out_features=attn_module.to_out[0].out_features if cross_attention_dim is None else max(attn_module.to_out[0].out_features, cross_attention_dim), From 598667c49865e8a2edab0a16ed3a2a0ab5311030 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 10:37:44 +0530 Subject: [PATCH 27/44] hack. --- tests/lora/test_lora_layers_old_backend.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index 3f7be59781aa..1b50f0ed30fa 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -200,7 +200,7 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True): attn_module.to_q.set_lora_layer( LoRALinearLayer( - in_features=max(attn_module.to_q.in_features, hidden_size), + in_features=min(attn_module.to_q.in_features, hidden_size), out_features=attn_module.to_q.out_features if cross_attention_dim is None else max(attn_module.to_q.out_features, cross_attention_dim), @@ -209,7 +209,7 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True): ) attn_module.to_k.set_lora_layer( LoRALinearLayer( - in_features=max(attn_module.to_k.in_features, hidden_size), + in_features=min(attn_module.to_k.in_features, hidden_size), out_features=attn_module.to_k.out_features if cross_attention_dim is None else max(attn_module.to_k.out_features, cross_attention_dim), @@ -218,7 +218,7 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True): ) attn_module.to_v.set_lora_layer( LoRALinearLayer( - in_features=max(attn_module.to_v.in_features, hidden_size), + in_features=min(attn_module.to_v.in_features, hidden_size), out_features=attn_module.to_v.out_features if cross_attention_dim is None else max(attn_module.to_v.out_features, cross_attention_dim), @@ -227,7 +227,7 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True): ) attn_module.to_out[0].set_lora_layer( LoRALinearLayer( - in_features=max(attn_module.to_out[0].in_features, hidden_size), + in_features=min(attn_module.to_out[0].in_features, hidden_size), out_features=attn_module.to_out[0].out_features if cross_attention_dim is None else max(attn_module.to_out[0].out_features, cross_attention_dim), From 60c524240393d48c8d8069ef30cd7c455ef5b7f8 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 10:38:51 +0530 Subject: [PATCH 28/44] remove comments and prep for a PR. --- src/diffusers/models/modeling_utils.py | 1 - tests/lora/test_lora_layers_old_backend.py | 89 ---------------------- 2 files changed, 90 deletions(-) diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py index 5714d027d484..546c5b20f937 100644 --- a/src/diffusers/models/modeling_utils.py +++ b/src/diffusers/models/modeling_utils.py @@ -150,7 +150,6 @@ def load_model_dict_into_meta( continue if empty_state_dict[param_name].shape != param.shape: - print(f"load_model_dict_into_meta param_name {param_name}") model_name_or_path_str = f"{model_name_or_path} " if model_name_or_path is not None else "" raise ValueError( f"Cannot load {model_name_or_path_str}because {param_name} expected shape {empty_state_dict[param_name]}, but got {param.shape}. If you want to instead overwrite randomly initialized weights, please make sure to pass both `low_cpu_mem_usage=False` and `ignore_mismatched_sizes=True`. For more information, see also: https://github.com/huggingface/diffusers/issues/1619#issuecomment-1345604389 as an example." diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index 1b50f0ed30fa..492c938ecd51 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -61,33 +61,6 @@ ) -# def create_lora_layers(model, mock_weights: bool = True): -# lora_attn_procs = {} -# for name in model.attn_processors.keys(): -# cross_attention_dim = None if name.endswith("attn1.processor") else model.config.cross_attention_dim -# if name.startswith("mid_block"): -# hidden_size = model.config.block_out_channels[-1] -# elif name.startswith("up_blocks"): -# block_id = int(name[len("up_blocks.")]) -# hidden_size = list(reversed(model.config.block_out_channels))[block_id] -# elif name.startswith("down_blocks"): -# block_id = int(name[len("down_blocks.")]) -# hidden_size = model.config.block_out_channels[block_id] - -# lora_attn_procs[name] = LoRAAttnProcessor(hidden_size=hidden_size, cross_attention_dim=cross_attention_dim) -# lora_attn_procs[name] = lora_attn_procs[name].to(model.device) - -# if mock_weights: -# # add 1 to weights to mock trained weights -# with torch.no_grad(): -# lora_attn_procs[name].to_q_lora.up.weight += 1 -# lora_attn_procs[name].to_k_lora.up.weight += 1 -# lora_attn_procs[name].to_v_lora.up.weight += 1 -# lora_attn_procs[name].to_out_lora.up.weight += 1 - -# return lora_attn_procs - - def text_encoder_attn_modules(text_encoder): attn_modules = [] @@ -192,12 +165,6 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True): for n in attn_processor_name.split(".")[:-1]: attn_module = getattr(attn_module, n) - # Set the `lora_layer` attribute of the attention-related matrices. - # print(f"Hidden size != attn_module.to_q.in_features: {hidden_size != attn_module.to_q.in_features}") - # print(f"Hidden size != attn_module.to_k.in_features: {hidden_size != attn_module.to_k.in_features}") - # print(f"Hidden size != attn_module.to_v.in_features: {hidden_size != attn_module.to_v.in_features}") - # print(f"Hidden size != attn_module.to_out[0].in_features: {hidden_size != attn_module.to_out[0].in_features}") - attn_module.to_q.set_lora_layer( LoRALinearLayer( in_features=min(attn_module.to_q.in_features, hidden_size), @@ -245,62 +212,6 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True): return unet_lora_state_dict(unet) -# def create_text_encoder_lora_attn_procs(text_encoder: nn.Module): -# text_lora_attn_procs = {} -# lora_attn_processor_class = ( -# LoRAAttnProcessor2_0 if hasattr(F, "scaled_dot_product_attention") else LoRAAttnProcessor -# ) -# for name, module in text_encoder_attn_modules(text_encoder): -# if isinstance(module.out_proj, nn.Linear): -# out_features = module.out_proj.out_features -# elif isinstance(module.out_proj, PatchedLoraProjection): -# out_features = module.out_proj.regular_linear_layer.out_features -# else: -# assert False, module.out_proj.__class__ - -# text_lora_attn_procs[name] = lora_attn_processor_class(hidden_size=out_features, cross_attention_dim=None) -# return text_lora_attn_procs - - -# def create_text_encoder_lora_layers(text_encoder: nn.Module): -# text_lora_attn_procs = create_text_encoder_lora_attn_procs(text_encoder) -# text_encoder_lora_layers = AttnProcsLayers(text_lora_attn_procs) -# return text_encoder_lora_layers - - -# def create_lora_3d_layers(model, mock_weights: bool = True): -# lora_attn_procs = {} -# for name in model.attn_processors.keys(): -# has_cross_attention = name.endswith("attn2.processor") and not ( -# name.startswith("transformer_in") or "temp_attentions" in name.split(".") -# ) -# cross_attention_dim = model.config.cross_attention_dim if has_cross_attention else None -# if name.startswith("mid_block"): -# hidden_size = model.config.block_out_channels[-1] -# elif name.startswith("up_blocks"): -# block_id = int(name[len("up_blocks.")]) -# hidden_size = list(reversed(model.config.block_out_channels))[block_id] -# elif name.startswith("down_blocks"): -# block_id = int(name[len("down_blocks.")]) -# hidden_size = model.config.block_out_channels[block_id] -# elif name.startswith("transformer_in"): -# # Note that the `8 * ...` comes from: https://github.com/huggingface/diffusers/blob/7139f0e874f10b2463caa8cbd585762a309d12d6/src/diffusers/models/unet_3d_condition.py#L148 -# hidden_size = 8 * model.config.attention_head_dim - -# lora_attn_procs[name] = LoRAAttnProcessor(hidden_size=hidden_size, cross_attention_dim=cross_attention_dim) -# lora_attn_procs[name] = lora_attn_procs[name].to(model.device) - -# if mock_weights: -# # add 1 to weights to mock trained weights -# with torch.no_grad(): -# lora_attn_procs[name].to_q_lora.up.weight += 1 -# lora_attn_procs[name].to_k_lora.up.weight += 1 -# lora_attn_procs[name].to_v_lora.up.weight += 1 -# lora_attn_procs[name].to_out_lora.up.weight += 1 - -# return lora_attn_procs - - def set_lora_weights(lora_attn_parameters, randn_weight=False, var=1.0): with torch.no_grad(): for parameter in lora_attn_parameters: From 22296683fe7bcd4b56a4fd257a687d027597c8ba Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 11:05:30 +0530 Subject: [PATCH 29/44] appropriate set_lora_weights() --- tests/lora/test_lora_layers_old_backend.py | 135 ++++++++++++++------- 1 file changed, 91 insertions(+), 44 deletions(-) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index 492c938ecd51..6c6d143382c1 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -95,6 +95,8 @@ def text_encoder_lora_state_dict(text_encoder): def create_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True): + unet_lora_parameters = [] + for attn_processor_name, attn_processor in unet.attn_processors.items(): # Parse the attention module. attn_module = unet @@ -138,7 +140,12 @@ def create_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True): attn_module.to_v.lora_layer.up.weight += 1 attn_module.to_out[0].lora_layer.up.weight += 1 - return unet_lora_state_dict(unet) + unet_lora_parameters.extend(attn_module.to_q.lora_layer.parameters()) + unet_lora_parameters.extend(attn_module.to_k.lora_layer.parameters()) + unet_lora_parameters.extend(attn_module.to_v.lora_layer.parameters()) + unet_lora_parameters.extend(attn_module.to_out[0].lora_layer.parameters()) + + return unet_lora_parameters, unet_lora_state_dict(unet) def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True): @@ -213,12 +220,16 @@ def create_3d_unet_lora_layers(unet: nn.Module, rank=4, mock_weights=True): def set_lora_weights(lora_attn_parameters, randn_weight=False, var=1.0): - with torch.no_grad(): - for parameter in lora_attn_parameters: - if randn_weight: - parameter[:] = torch.randn_like(parameter) * var - else: - torch.zero_(parameter) + if not isinstance(lora_attn_parameters, dict): + with torch.no_grad(): + for parameter in lora_attn_parameters: + if randn_weight: + parameter[:] = torch.randn_like(parameter) * var + else: + torch.zero_(parameter) + else: + modified_state_dict = {k: torch.rand_like(v) * var for k, v in lora_attn_parameters.items()} + return modified_state_dict def state_dicts_almost_equal(sd1, sd2): @@ -280,7 +291,7 @@ def get_dummy_components(self): text_encoder = CLIPTextModel(text_encoder_config) tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") - unet_lora_params = create_unet_lora_layers(unet, rank=self.lora_rank) + unet_lora_raw_params, unet_lora_params = create_unet_lora_layers(unet, rank=self.lora_rank) text_encoder_lora_params = LoraLoaderMixin._modify_text_encoder( text_encoder, dtype=torch.float32, rank=self.lora_rank ) @@ -296,7 +307,11 @@ def get_dummy_components(self): "feature_extractor": None, "image_encoder": None, } - lora_components = {"unet_lora_params": unet_lora_params, "text_encoder_lora_params": text_encoder_lora_params} + lora_components = { + "unet_lora_raw_params": unet_lora_raw_params, + "unet_lora_params": unet_lora_params, + "text_encoder_lora_params": text_encoder_lora_params, + } return pipeline_components, lora_components def get_dummy_inputs(self, with_generator=True): @@ -599,8 +614,9 @@ def test_unload_lora_sd(self): orig_image_slice = original_images[0, -3:, -3:, -1] # Emulate training. - set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True) - set_lora_weights(lora_components["text_encoder_lora_layers"].parameters(), randn_weight=True) + lora_components["text_encoder_lora_layers"] = set_lora_weights( + lora_components["text_encoder_lora_layers"], randn_weight=True + ) with tempfile.TemporaryDirectory() as tmpdirname: LoraLoaderMixin.save_lora_weights( @@ -774,7 +790,7 @@ def get_dummy_components(self): text_encoder = CLIPTextModel(text_encoder_config) tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") - unet_lora_params = create_unet_lora_layers(unet, rank=self.lora_rank) + unet_lora_raw_params, unet_lora_params = create_unet_lora_layers(unet, rank=self.lora_rank) text_encoder_lora_params = LoraLoaderMixin._modify_text_encoder( text_encoder, dtype=torch.float32, rank=self.lora_rank ) @@ -790,7 +806,11 @@ def get_dummy_components(self): "feature_extractor": None, "image_encoder": None, } - lora_components = {"unet_lora_params": unet_lora_params, "text_encoder_lora_params": text_encoder_lora_params} + lora_components = { + "unet_lora_raw_params": unet_lora_raw_params, + "unet_lora_params": unet_lora_params, + "text_encoder_lora_params": text_encoder_lora_params, + } return components, lora_components def test_stable_diffusion_inpaint_lora(self): @@ -888,7 +908,7 @@ def get_dummy_components(self): text_encoder_2 = CLIPTextModelWithProjection(text_encoder_config) tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") - unet_lora_params = create_unet_lora_layers(unet, rank=self.lora_rank) + _, unet_lora_params = create_unet_lora_layers(unet, rank=self.lora_rank) text_encoder_lora_params = StableDiffusionXLLoraLoaderMixin._modify_text_encoder( text_encoder, dtype=torch.float32, rank=self.lora_rank ) @@ -973,9 +993,12 @@ def test_unload_lora_sdxl(self): orig_image_slice = original_images[0, -3:, -3:, -1] # Emulate training. - set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True) - set_lora_weights(lora_components["text_encoder_lora_params"].parameters(), randn_weight=True) - set_lora_weights(lora_components["text_encoder_two_lora_params"].parameters(), randn_weight=True) + lora_components["text_encoder_lora_params"] = set_lora_weights( + lora_components["text_encoder_lora_params"], randn_weight=True + ) + lora_components["text_encoder_two_lora_params"] = set_lora_weights( + lora_components["text_encoder_two_lora_params"], randn_weight=True + ) with tempfile.TemporaryDirectory() as tmpdirname: StableDiffusionXLPipeline.save_lora_weights( @@ -1091,9 +1114,12 @@ def test_lora_fuse_nan(self): _, _, pipeline_inputs = self.get_dummy_inputs(with_generator=False) # Emulate training. - set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True) - set_lora_weights(lora_components["text_encoder_lora_params"].parameters(), randn_weight=True) - set_lora_weights(lora_components["text_encoder_two_lora_params"].parameters(), randn_weight=True) + lora_components["text_encoder_lora_params"] = set_lora_weights( + lora_components["text_encoder_lora_params"], randn_weight=True + ) + lora_components["text_encoder_two_lora_params"] = set_lora_weights( + lora_components["text_encoder_two_lora_params"], randn_weight=True + ) with tempfile.TemporaryDirectory() as tmpdirname: StableDiffusionXLPipeline.save_lora_weights( @@ -1135,9 +1161,12 @@ def test_lora_fusion(self): orig_image_slice = original_images[0, -3:, -3:, -1] # Emulate training. - set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True) - set_lora_weights(lora_components["text_encoder_lora_params"].parameters(), randn_weight=True) - set_lora_weights(lora_components["text_encoder_two_lora_params"].parameters(), randn_weight=True) + lora_components["text_encoder_lora_params"] = set_lora_weights( + lora_components["text_encoder_lora_params"], randn_weight=True + ) + lora_components["text_encoder_two_lora_params"] = set_lora_weights( + lora_components["text_encoder_two_lora_params"], randn_weight=True + ) with tempfile.TemporaryDirectory() as tmpdirname: StableDiffusionXLPipeline.save_lora_weights( @@ -1168,9 +1197,12 @@ def test_unfuse_lora(self): orig_image_slice = original_images[0, -3:, -3:, -1] # Emulate training. - set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True) - set_lora_weights(lora_components["text_encoder_lora_params"].parameters(), randn_weight=True) - set_lora_weights(lora_components["text_encoder_two_lora_params"].parameters(), randn_weight=True) + lora_components["text_encoder_lora_params"] = set_lora_weights( + lora_components["text_encoder_lora_params"], randn_weight=True + ) + lora_components["text_encoder_two_lora_params"] = set_lora_weights( + lora_components["text_encoder_two_lora_params"], randn_weight=True + ) with tempfile.TemporaryDirectory() as tmpdirname: StableDiffusionXLPipeline.save_lora_weights( @@ -1213,9 +1245,12 @@ def test_lora_fusion_is_not_affected_by_unloading(self): _ = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images # Emulate training. - set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True) - set_lora_weights(lora_components["text_encoder_lora_params"].parameters(), randn_weight=True) - set_lora_weights(lora_components["text_encoder_two_lora_params"].parameters(), randn_weight=True) + lora_components["text_encoder_lora_params"] = set_lora_weights( + lora_components["text_encoder_lora_params"], randn_weight=True + ) + lora_components["text_encoder_two_lora_params"] = set_lora_weights( + lora_components["text_encoder_two_lora_params"], randn_weight=True + ) with tempfile.TemporaryDirectory() as tmpdirname: StableDiffusionXLPipeline.save_lora_weights( @@ -1252,9 +1287,12 @@ def test_fuse_lora_with_different_scales(self): _ = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images # Emulate training. - set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True) - set_lora_weights(lora_components["text_encoder_lora_params"].parameters(), randn_weight=True) - set_lora_weights(lora_components["text_encoder_two_lora_params"].parameters(), randn_weight=True) + lora_components["text_encoder_lora_params"] = set_lora_weights( + lora_components["text_encoder_lora_params"], randn_weight=True + ) + lora_components["text_encoder_two_lora_params"] = set_lora_weights( + lora_components["text_encoder_two_lora_params"], randn_weight=True + ) with tempfile.TemporaryDirectory() as tmpdirname: StableDiffusionXLPipeline.save_lora_weights( @@ -1304,9 +1342,12 @@ def test_with_different_scales(self): original_imagee_slice = original_images[0, -3:, -3:, -1] # Emulate training. - set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True) - set_lora_weights(lora_components["text_encoder_lora_params"].parameters(), randn_weight=True) - set_lora_weights(lora_components["text_encoder_two_lora_params"].parameters(), randn_weight=True) + lora_components["text_encoder_lora_params"] = set_lora_weights( + lora_components["text_encoder_lora_params"], randn_weight=True + ) + lora_components["text_encoder_two_lora_params"] = set_lora_weights( + lora_components["text_encoder_two_lora_params"], randn_weight=True + ) with tempfile.TemporaryDirectory() as tmpdirname: StableDiffusionXLPipeline.save_lora_weights( @@ -1352,9 +1393,12 @@ def test_with_different_scales_fusion_equivalence(self): images_slice = images[0, -3:, -3:, -1] # Emulate training. - set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True, var=0.1) - set_lora_weights(lora_components["text_encoder_lora_params"].parameters(), randn_weight=True, var=0.1) - set_lora_weights(lora_components["text_encoder_two_lora_params"].parameters(), randn_weight=True, var=0.1) + lora_components["text_encoder_lora_params"] = set_lora_weights( + lora_components["text_encoder_lora_params"], randn_weight=True, var=0.1 + ) + lora_components["text_encoder_two_lora_params"] = set_lora_weights( + lora_components["text_encoder_two_lora_params"], randn_weight=True, var=0.1 + ) with tempfile.TemporaryDirectory() as tmpdirname: StableDiffusionXLPipeline.save_lora_weights( @@ -1399,9 +1443,12 @@ def test_save_load_fused_lora_modules(self): _, _, pipeline_inputs = self.get_dummy_inputs(with_generator=False) # Emulate training. - set_lora_weights(lora_components["unet_lora_params"].parameters(), randn_weight=True, var=0.1) - set_lora_weights(lora_components["text_encoder_lora_params"].parameters(), randn_weight=True, var=0.1) - set_lora_weights(lora_components["text_encoder_two_lora_params"].parameters(), randn_weight=True, var=0.1) + lora_components["text_encoder_lora_params"] = set_lora_weights( + lora_components["text_encoder_lora_params"], randn_weight=True, var=0.1 + ) + lora_components["text_encoder_two_lora_params"] = set_lora_weights( + lora_components["text_encoder_two_lora_params"], randn_weight=True, var=0.1 + ) with tempfile.TemporaryDirectory() as tmpdirname: StableDiffusionXLPipeline.save_lora_weights( @@ -1482,7 +1529,7 @@ def test_lora_processors(self): with torch.no_grad(): sample1 = model(**inputs_dict).sample - lora_params = create_unet_lora_layers(model) + _, lora_params = create_unet_lora_layers(model) # make sure we can set a list of attention processors model.load_attn_procs(lora_params) @@ -1515,7 +1562,7 @@ def test_lora_on_off(self, expected_max_diff=1e-3): with torch.no_grad(): old_sample = model(**inputs_dict).sample - lora_params = create_unet_lora_layers(model) + _, lora_params = create_unet_lora_layers(model) model.load_attn_procs(lora_params) with torch.no_grad(): @@ -1545,7 +1592,7 @@ def test_lora_xformers_on_off(self, expected_max_diff=6e-4): torch.manual_seed(0) model = self.model_class(**init_dict) model.to(torch_device) - lora_params = create_unet_lora_layers(model) + _, lora_params = create_unet_lora_layers(model) model.load_attn_procs(lora_params) # default From 03bb29cc958ba20d80953d0f69ad11a6a9c4f100 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 11:24:50 +0530 Subject: [PATCH 30/44] fix --- tests/lora/test_lora_layers_old_backend.py | 121 ++++++--------------- 1 file changed, 32 insertions(+), 89 deletions(-) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index 6c6d143382c1..8750bfe306b5 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -228,7 +228,10 @@ def set_lora_weights(lora_attn_parameters, randn_weight=False, var=1.0): else: torch.zero_(parameter) else: - modified_state_dict = {k: torch.rand_like(v) * var for k, v in lora_attn_parameters.items()} + if randn_weight: + modified_state_dict = {k: torch.rand_like(v) * var for k, v in lora_attn_parameters.items()} + else: + modified_state_dict = {k: torch.zeros_like(v) * var for k, v in lora_attn_parameters.items()} return modified_state_dict @@ -295,7 +298,9 @@ def get_dummy_components(self): text_encoder_lora_params = LoraLoaderMixin._modify_text_encoder( text_encoder, dtype=torch.float32, rank=self.lora_rank ) - text_encoder_lora_params = text_encoder_lora_state_dict(text_encoder) + text_encoder_lora_params = set_lora_weights( + text_encoder_lora_state_dict(text_encoder), randn_weight=True, var=0.1 + ) pipeline_components = { "unet": unet, @@ -469,13 +474,15 @@ def test_text_encoder_lora_monkey_patch(self): assert outputs_without_lora.shape == (1, 77, 32) # monkey patch - params = pipe._modify_text_encoder(pipe.text_encoder, pipe.lora_scale) - set_lora_weights(params, randn_weight=False) + text_encoder_lora_params = pipe._modify_text_encoder(pipe.text_encoder, pipe.lora_scale) + text_encoder_lora_params = set_lora_weights( + text_encoder_lora_state_dict(pipe.text_encoder), randn_weight=False + ) with tempfile.TemporaryDirectory() as tmpdirname: LoraLoaderMixin.save_lora_weights( save_directory=tmpdirname, unet_lora_layers=None, - text_encoder_lora_layers=params, + text_encoder_lora_layers=text_encoder_lora_params, ) self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) pipe.load_lora_weights(tmpdirname) @@ -492,13 +499,15 @@ def test_text_encoder_lora_monkey_patch(self): pipeline_components, _ = self.get_dummy_components() pipe = StableDiffusionPipeline(**pipeline_components) - params = pipe._modify_text_encoder(pipe.text_encoder, pipe.lora_scale) - set_lora_weights(params, randn_weight=True) + text_encoder_lora_params = pipe._modify_text_encoder(pipe.text_encoder, pipe.lora_scale) + text_encoder_lora_params = set_lora_weights( + text_encoder_lora_state_dict(pipe.text_encoder), randn_weight=True, var=0.1 + ) with tempfile.TemporaryDirectory() as tmpdirname: LoraLoaderMixin.save_lora_weights( save_directory=tmpdirname, unet_lora_layers=None, - text_encoder_lora_layers=params, + text_encoder_lora_layers=text_encoder_lora_params, ) self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) pipe.load_lora_weights(tmpdirname) @@ -523,7 +532,7 @@ def test_text_encoder_lora_remove_monkey_patch(self): # monkey patch params = pipe._modify_text_encoder(pipe.text_encoder, pipe.lora_scale) - set_lora_weights(params, randn_weight=True) + params = set_lora_weights(text_encoder_lora_state_dict(pipe.text_encoder), var=0.1, randn_weight=True) with tempfile.TemporaryDirectory() as tmpdirname: LoraLoaderMixin.save_lora_weights( save_directory=tmpdirname, @@ -615,7 +624,9 @@ def test_unload_lora_sd(self): # Emulate training. lora_components["text_encoder_lora_layers"] = set_lora_weights( - lora_components["text_encoder_lora_layers"], randn_weight=True + lora_components["text_encoder_lora_layers"], + randn_weight=True, + var=0.1, ) with tempfile.TemporaryDirectory() as tmpdirname: @@ -714,7 +725,7 @@ def test_lora_save_load_with_xformers(self): @deprecate_after_peft_backend -class SDXInpaintLoraMixinTests(unittest.TestCase): +class SDInpaintLoraMixinTests(unittest.TestCase): lora_rank = 4 def get_dummy_inputs(self, device, seed=0, img_res=64, output_pil=True): @@ -791,10 +802,12 @@ def get_dummy_components(self): tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") unet_lora_raw_params, unet_lora_params = create_unet_lora_layers(unet, rank=self.lora_rank) - text_encoder_lora_params = LoraLoaderMixin._modify_text_encoder( + text_encoder_lora_params = StableDiffusionXLLoraLoaderMixin._modify_text_encoder( text_encoder, dtype=torch.float32, rank=self.lora_rank ) - text_encoder_lora_params = text_encoder_lora_state_dict(text_encoder) + text_encoder_lora_params = set_lora_weights( + text_encoder_lora_state_dict(text_encoder), randn_weight=True, var=0.1 + ) components = { "unet": unet, @@ -912,11 +925,15 @@ def get_dummy_components(self): text_encoder_lora_params = StableDiffusionXLLoraLoaderMixin._modify_text_encoder( text_encoder, dtype=torch.float32, rank=self.lora_rank ) - text_encoder_lora_params = text_encoder_lora_state_dict(text_encoder) + text_encoder_lora_params = set_lora_weights( + text_encoder_lora_state_dict(text_encoder), randn_weight=True, var=0.1 + ) text_encoder_two_lora_params = StableDiffusionXLLoraLoaderMixin._modify_text_encoder( text_encoder_2, dtype=torch.float32, rank=self.lora_rank ) - text_encoder_two_lora_params = text_encoder_lora_state_dict(text_encoder_2) + text_encoder_two_lora_params = set_lora_weights( + text_encoder_lora_state_dict(text_encoder_2), randn_weight=True, var=0.1 + ) pipeline_components = { "unet": unet, @@ -992,14 +1009,6 @@ def test_unload_lora_sdxl(self): original_images = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images orig_image_slice = original_images[0, -3:, -3:, -1] - # Emulate training. - lora_components["text_encoder_lora_params"] = set_lora_weights( - lora_components["text_encoder_lora_params"], randn_weight=True - ) - lora_components["text_encoder_two_lora_params"] = set_lora_weights( - lora_components["text_encoder_two_lora_params"], randn_weight=True - ) - with tempfile.TemporaryDirectory() as tmpdirname: StableDiffusionXLPipeline.save_lora_weights( save_directory=tmpdirname, @@ -1111,16 +1120,6 @@ def test_lora_fuse_nan(self): sd_pipe = sd_pipe.to(torch_device) sd_pipe.set_progress_bar_config(disable=None) - _, _, pipeline_inputs = self.get_dummy_inputs(with_generator=False) - - # Emulate training. - lora_components["text_encoder_lora_params"] = set_lora_weights( - lora_components["text_encoder_lora_params"], randn_weight=True - ) - lora_components["text_encoder_two_lora_params"] = set_lora_weights( - lora_components["text_encoder_two_lora_params"], randn_weight=True - ) - with tempfile.TemporaryDirectory() as tmpdirname: StableDiffusionXLPipeline.save_lora_weights( save_directory=tmpdirname, @@ -1160,14 +1159,6 @@ def test_lora_fusion(self): original_images = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images orig_image_slice = original_images[0, -3:, -3:, -1] - # Emulate training. - lora_components["text_encoder_lora_params"] = set_lora_weights( - lora_components["text_encoder_lora_params"], randn_weight=True - ) - lora_components["text_encoder_two_lora_params"] = set_lora_weights( - lora_components["text_encoder_two_lora_params"], randn_weight=True - ) - with tempfile.TemporaryDirectory() as tmpdirname: StableDiffusionXLPipeline.save_lora_weights( save_directory=tmpdirname, @@ -1196,14 +1187,6 @@ def test_unfuse_lora(self): original_images = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images orig_image_slice = original_images[0, -3:, -3:, -1] - # Emulate training. - lora_components["text_encoder_lora_params"] = set_lora_weights( - lora_components["text_encoder_lora_params"], randn_weight=True - ) - lora_components["text_encoder_two_lora_params"] = set_lora_weights( - lora_components["text_encoder_two_lora_params"], randn_weight=True - ) - with tempfile.TemporaryDirectory() as tmpdirname: StableDiffusionXLPipeline.save_lora_weights( save_directory=tmpdirname, @@ -1244,14 +1227,6 @@ def test_lora_fusion_is_not_affected_by_unloading(self): _ = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images - # Emulate training. - lora_components["text_encoder_lora_params"] = set_lora_weights( - lora_components["text_encoder_lora_params"], randn_weight=True - ) - lora_components["text_encoder_two_lora_params"] = set_lora_weights( - lora_components["text_encoder_two_lora_params"], randn_weight=True - ) - with tempfile.TemporaryDirectory() as tmpdirname: StableDiffusionXLPipeline.save_lora_weights( save_directory=tmpdirname, @@ -1286,14 +1261,6 @@ def test_fuse_lora_with_different_scales(self): _ = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images - # Emulate training. - lora_components["text_encoder_lora_params"] = set_lora_weights( - lora_components["text_encoder_lora_params"], randn_weight=True - ) - lora_components["text_encoder_two_lora_params"] = set_lora_weights( - lora_components["text_encoder_two_lora_params"], randn_weight=True - ) - with tempfile.TemporaryDirectory() as tmpdirname: StableDiffusionXLPipeline.save_lora_weights( save_directory=tmpdirname, @@ -1341,14 +1308,6 @@ def test_with_different_scales(self): original_images = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images original_imagee_slice = original_images[0, -3:, -3:, -1] - # Emulate training. - lora_components["text_encoder_lora_params"] = set_lora_weights( - lora_components["text_encoder_lora_params"], randn_weight=True - ) - lora_components["text_encoder_two_lora_params"] = set_lora_weights( - lora_components["text_encoder_two_lora_params"], randn_weight=True - ) - with tempfile.TemporaryDirectory() as tmpdirname: StableDiffusionXLPipeline.save_lora_weights( save_directory=tmpdirname, @@ -1392,14 +1351,6 @@ def test_with_different_scales_fusion_equivalence(self): images = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images images_slice = images[0, -3:, -3:, -1] - # Emulate training. - lora_components["text_encoder_lora_params"] = set_lora_weights( - lora_components["text_encoder_lora_params"], randn_weight=True, var=0.1 - ) - lora_components["text_encoder_two_lora_params"] = set_lora_weights( - lora_components["text_encoder_two_lora_params"], randn_weight=True, var=0.1 - ) - with tempfile.TemporaryDirectory() as tmpdirname: StableDiffusionXLPipeline.save_lora_weights( save_directory=tmpdirname, @@ -1442,14 +1393,6 @@ def test_save_load_fused_lora_modules(self): _, _, pipeline_inputs = self.get_dummy_inputs(with_generator=False) - # Emulate training. - lora_components["text_encoder_lora_params"] = set_lora_weights( - lora_components["text_encoder_lora_params"], randn_weight=True, var=0.1 - ) - lora_components["text_encoder_two_lora_params"] = set_lora_weights( - lora_components["text_encoder_two_lora_params"], randn_weight=True, var=0.1 - ) - with tempfile.TemporaryDirectory() as tmpdirname: StableDiffusionXLPipeline.save_lora_weights( save_directory=tmpdirname, From f49185edf731c7a28eb38538d8d239278715fd7f Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 11:28:47 +0530 Subject: [PATCH 31/44] fix: test_unload_lora_sd --- tests/lora/test_lora_layers_old_backend.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index 8750bfe306b5..d45722ee526f 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -622,13 +622,6 @@ def test_unload_lora_sd(self): original_images = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images orig_image_slice = original_images[0, -3:, -3:, -1] - # Emulate training. - lora_components["text_encoder_lora_layers"] = set_lora_weights( - lora_components["text_encoder_lora_layers"], - randn_weight=True, - var=0.1, - ) - with tempfile.TemporaryDirectory() as tmpdirname: LoraLoaderMixin.save_lora_weights( save_directory=tmpdirname, From b05585d02324974a01b2556fd36891606e3f31ee Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 11:29:16 +0530 Subject: [PATCH 32/44] fix: test_unload_lora_sd --- tests/lora/test_lora_layers_old_backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index d45722ee526f..0ae054f8679b 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -626,7 +626,7 @@ def test_unload_lora_sd(self): LoraLoaderMixin.save_lora_weights( save_directory=tmpdirname, unet_lora_layers=lora_components["unet_lora_params"], - text_encoder_lora_layers=lora_components["text_encoder_lora_layers"], + text_encoder_lora_layers=lora_components["text_encoder_lora_params"], ) self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) sd_pipe.load_lora_weights(tmpdirname) From 451816bee63e61af0d736789887093a4c389152d Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 11:31:06 +0530 Subject: [PATCH 33/44] use dfault attebtion processors. --- tests/lora/test_lora_layers_old_backend.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index 0ae054f8679b..8253752fabfe 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -618,6 +618,7 @@ def test_unload_lora_sd(self): pipeline_components, lora_components = self.get_dummy_components() _, _, pipeline_inputs = self.get_dummy_inputs(with_generator=False) sd_pipe = StableDiffusionPipeline(**pipeline_components) + sd_pipe.unet.set_default_attn_processor() original_images = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images orig_image_slice = original_images[0, -3:, -3:, -1] @@ -998,6 +999,7 @@ def test_unload_lora_sdxl(self): pipeline_components, lora_components = self.get_dummy_components() _, _, pipeline_inputs = self.get_dummy_inputs(with_generator=False) sd_pipe = StableDiffusionXLPipeline(**pipeline_components) + sd_pipe.unet.set_default_attn_processor() original_images = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images orig_image_slice = original_images[0, -3:, -3:, -1] From dc085bf1b726a25b28456a46dd39c49d4db3f292 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 11:37:44 +0530 Subject: [PATCH 34/44] debu --- tests/lora/test_lora_layers_old_backend.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index 8253752fabfe..2ce176f6e07f 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -1128,6 +1128,7 @@ def test_lora_fuse_nan(self): # corrupt one LoRA weight with `inf` values with torch.no_grad(): + print(sd_pipe.unet.mid_block.attentions[0].transformer_blocks[0].attn1.to_q.lora_layer.down.weight.data[0, :3]) sd_pipe.unet.mid_block.attentions[0].transformer_blocks[0].attn1.to_q.lora_layer.down.weight += float( "inf" ) From 7d9cbaa851fcd5e4bf7894e6cdaaecb8f36802af Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 11:38:22 +0530 Subject: [PATCH 35/44] debug nan --- tests/lora/test_lora_layers_old_backend.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index 2ce176f6e07f..5d6d14d8cc97 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -1132,6 +1132,7 @@ def test_lora_fuse_nan(self): sd_pipe.unet.mid_block.attentions[0].transformer_blocks[0].attn1.to_q.lora_layer.down.weight += float( "inf" ) + print(sd_pipe.unet.mid_block.attentions[0].transformer_blocks[0].attn1.to_q.lora_layer.down.weight.data[0, :3]) # with `safe_fusing=True` we should see an Error with self.assertRaises(ValueError): From d15af62ad05a73f06e237202d82aae989e4be70f Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 11:40:09 +0530 Subject: [PATCH 36/44] debug nan --- src/diffusers/models/lora.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py index daac8f902cd6..bebcfaa9d24c 100644 --- a/src/diffusers/models/lora.py +++ b/src/diffusers/models/lora.py @@ -308,6 +308,7 @@ def _fuse_lora(self, lora_scale: float = 1.0, safe_fusing: bool = False): w_orig = self.weight.data.float() w_up = self.lora_layer.up.weight.data.float() w_down = self.lora_layer.down.weight.data.float() + print(w_down[0, :3], torch.isnan(w_down).any().item()) if self.lora_layer.network_alpha is not None: w_up = w_up * self.lora_layer.network_alpha / self.lora_layer.rank From a7b1606d66df5d647bfde1563a05aaac852171ba Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 11:41:00 +0530 Subject: [PATCH 37/44] debug nan --- src/diffusers/models/lora.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py index bebcfaa9d24c..fed78b075205 100644 --- a/src/diffusers/models/lora.py +++ b/src/diffusers/models/lora.py @@ -308,7 +308,6 @@ def _fuse_lora(self, lora_scale: float = 1.0, safe_fusing: bool = False): w_orig = self.weight.data.float() w_up = self.lora_layer.up.weight.data.float() w_down = self.lora_layer.down.weight.data.float() - print(w_down[0, :3], torch.isnan(w_down).any().item()) if self.lora_layer.network_alpha is not None: w_up = w_up * self.lora_layer.network_alpha / self.lora_layer.rank @@ -388,6 +387,8 @@ def _fuse_lora(self, lora_scale: float = 1.0, safe_fusing: bool = False): w_up = self.lora_layer.up.weight.data.float() w_down = self.lora_layer.down.weight.data.float() + print(w_down[0, :3], torch.isnan(w_down).any().item()) + if self.lora_layer.network_alpha is not None: w_up = w_up * self.lora_layer.network_alpha / self.lora_layer.rank From b507b403ebb0cc16934844efd09778353c143a36 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 11:44:37 +0530 Subject: [PATCH 38/44] use NaN instead of inf --- tests/lora/test_lora_layers_old_backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index 5d6d14d8cc97..b7c71a70cc64 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -1130,7 +1130,7 @@ def test_lora_fuse_nan(self): with torch.no_grad(): print(sd_pipe.unet.mid_block.attentions[0].transformer_blocks[0].attn1.to_q.lora_layer.down.weight.data[0, :3]) sd_pipe.unet.mid_block.attentions[0].transformer_blocks[0].attn1.to_q.lora_layer.down.weight += float( - "inf" + "NaN" ) print(sd_pipe.unet.mid_block.attentions[0].transformer_blocks[0].attn1.to_q.lora_layer.down.weight.data[0, :3]) From 67df56fe8d4f0f3a5bfac8e0c2d17dcefd7738ac Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 11:45:13 +0530 Subject: [PATCH 39/44] remove comments. --- src/diffusers/models/lora.py | 2 -- tests/lora/test_lora_layers_old_backend.py | 2 -- 2 files changed, 4 deletions(-) diff --git a/src/diffusers/models/lora.py b/src/diffusers/models/lora.py index fed78b075205..daac8f902cd6 100644 --- a/src/diffusers/models/lora.py +++ b/src/diffusers/models/lora.py @@ -387,8 +387,6 @@ def _fuse_lora(self, lora_scale: float = 1.0, safe_fusing: bool = False): w_up = self.lora_layer.up.weight.data.float() w_down = self.lora_layer.down.weight.data.float() - print(w_down[0, :3], torch.isnan(w_down).any().item()) - if self.lora_layer.network_alpha is not None: w_up = w_up * self.lora_layer.network_alpha / self.lora_layer.rank diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index b7c71a70cc64..7b847c745a4b 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -1128,11 +1128,9 @@ def test_lora_fuse_nan(self): # corrupt one LoRA weight with `inf` values with torch.no_grad(): - print(sd_pipe.unet.mid_block.attentions[0].transformer_blocks[0].attn1.to_q.lora_layer.down.weight.data[0, :3]) sd_pipe.unet.mid_block.attentions[0].transformer_blocks[0].attn1.to_q.lora_layer.down.weight += float( "NaN" ) - print(sd_pipe.unet.mid_block.attentions[0].transformer_blocks[0].attn1.to_q.lora_layer.down.weight.data[0, :3]) # with `safe_fusing=True` we should see an Error with self.assertRaises(ValueError): From ccda99293b9bff2380143a5d413688517efcd4d7 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 13:22:18 +0530 Subject: [PATCH 40/44] fix: test_text_encoder_lora_state_dict_unchanged --- tests/lora/test_lora_layers_old_backend.py | 47 +++++++++++++++------- 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index 7b847c745a4b..3709a0e045ce 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -858,7 +858,7 @@ def test_stable_diffusion_inpaint_lora(self): class SDXLLoraLoaderMixinTests(unittest.TestCase): lora_rank = 4 - def get_dummy_components(self): + def get_dummy_components(self, modify_text_encoder=True): torch.manual_seed(0) unet = UNet2DConditionModel( block_out_channels=(32, 64), @@ -916,18 +916,23 @@ def get_dummy_components(self): tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") _, unet_lora_params = create_unet_lora_layers(unet, rank=self.lora_rank) - text_encoder_lora_params = StableDiffusionXLLoraLoaderMixin._modify_text_encoder( - text_encoder, dtype=torch.float32, rank=self.lora_rank - ) - text_encoder_lora_params = set_lora_weights( - text_encoder_lora_state_dict(text_encoder), randn_weight=True, var=0.1 - ) - text_encoder_two_lora_params = StableDiffusionXLLoraLoaderMixin._modify_text_encoder( - text_encoder_2, dtype=torch.float32, rank=self.lora_rank - ) - text_encoder_two_lora_params = set_lora_weights( - text_encoder_lora_state_dict(text_encoder_2), randn_weight=True, var=0.1 - ) + + if modify_text_encoder: + text_encoder_lora_params = StableDiffusionXLLoraLoaderMixin._modify_text_encoder( + text_encoder, dtype=torch.float32, rank=self.lora_rank + ) + text_encoder_lora_params = set_lora_weights( + text_encoder_lora_state_dict(text_encoder), randn_weight=True, var=0.1 + ) + text_encoder_two_lora_params = StableDiffusionXLLoraLoaderMixin._modify_text_encoder( + text_encoder_2, dtype=torch.float32, rank=self.lora_rank + ) + text_encoder_two_lora_params = set_lora_weights( + text_encoder_lora_state_dict(text_encoder_2), randn_weight=True, var=0.1 + ) + else: + text_encoder_lora_params = None + text_encoder_two_lora_params = None pipeline_components = { "unet": unet, @@ -1052,7 +1057,7 @@ def test_load_lora_locally(self): sd_pipe.unload_lora_weights() def test_text_encoder_lora_state_dict_unchanged(self): - pipeline_components, lora_components = self.get_dummy_components() + pipeline_components, lora_components = self.get_dummy_components(modify_text_encoder=False) sd_pipe = StableDiffusionXLPipeline(**pipeline_components) text_encoder_1_sd_keys = sorted(sd_pipe.text_encoder.state_dict().keys()) @@ -1061,6 +1066,20 @@ def test_text_encoder_lora_state_dict_unchanged(self): sd_pipe = sd_pipe.to(torch_device) sd_pipe.set_progress_bar_config(disable=None) + # Modify the text encoder. + _ = StableDiffusionXLLoraLoaderMixin._modify_text_encoder( + sd_pipe.text_encoder, dtype=torch.float32, rank=self.lora_rank + ) + lora_components["text_encoder_lora_params"] = set_lora_weights( + text_encoder_lora_state_dict(sd_pipe.text_encoder), randn_weight=True, var=0.1 + ) + _ = StableDiffusionXLLoraLoaderMixin._modify_text_encoder( + sd_pipe.text_encoder_2, dtype=torch.float32, rank=self.lora_rank + ) + lora_components["text_encoder_two_lora_params"] = set_lora_weights( + text_encoder_lora_state_dict(sd_pipe.text_encoder_2), randn_weight=True, var=0.1 + ) + with tempfile.TemporaryDirectory() as tmpdirname: StableDiffusionXLPipeline.save_lora_weights( save_directory=tmpdirname, From bd995dfd76b6d88e3f26b708cc22cb57a97aff66 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 13:23:29 +0530 Subject: [PATCH 41/44] attention processor default --- tests/lora/test_lora_layers_old_backend.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index 3709a0e045ce..182bd718280a 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -1195,6 +1195,7 @@ def test_unfuse_lora(self): sd_pipe = StableDiffusionXLPipeline(**pipeline_components) sd_pipe = sd_pipe.to(torch_device) sd_pipe.set_progress_bar_config(disable=None) + sd_pipe.unet.set_default_attn_processor() _, _, pipeline_inputs = self.get_dummy_inputs(with_generator=False) From e80acf3cc8f1eb9a2e9243e3b32ff3b5afef67a0 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 13:26:23 +0530 Subject: [PATCH 42/44] default attention processors. --- tests/lora/test_lora_layers_old_backend.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index 182bd718280a..13c539dba936 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -1318,6 +1318,7 @@ def test_with_different_scales(self): sd_pipe = StableDiffusionXLPipeline(**pipeline_components) sd_pipe = sd_pipe.to(torch_device) sd_pipe.set_progress_bar_config(disable=None) + sd_pipe.unet.set_default_attn_processor() _, _, pipeline_inputs = self.get_dummy_inputs(with_generator=False) original_images = sd_pipe(**pipeline_inputs, generator=torch.manual_seed(0)).images From 046a8b3174d3ff77dfd488a7aaad647120e7d442 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 13:31:21 +0530 Subject: [PATCH 43/44] default --- tests/lora/test_lora_layers_old_backend.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index 13c539dba936..6b2822d6926e 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -1361,6 +1361,7 @@ def test_with_different_scales_fusion_equivalence(self): sd_pipe = StableDiffusionXLPipeline(**pipeline_components) sd_pipe = sd_pipe.to(torch_device) sd_pipe.set_progress_bar_config(disable=None) + sd_pipe.unet.set_default_attn_processor() _, _, pipeline_inputs = self.get_dummy_inputs(with_generator=False) From 6f2dda30fa3bae6869d4f6f42a65d6e08d35053f Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 28 Dec 2023 13:38:07 +0530 Subject: [PATCH 44/44] style --- tests/lora/test_lora_layers_old_backend.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index 6b2822d6926e..7d6d30169455 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -916,7 +916,7 @@ def get_dummy_components(self, modify_text_encoder=True): tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") _, unet_lora_params = create_unet_lora_layers(unet, rank=self.lora_rank) - + if modify_text_encoder: text_encoder_lora_params = StableDiffusionXLLoraLoaderMixin._modify_text_encoder( text_encoder, dtype=torch.float32, rank=self.lora_rank @@ -931,7 +931,7 @@ def get_dummy_components(self, modify_text_encoder=True): text_encoder_lora_state_dict(text_encoder_2), randn_weight=True, var=0.1 ) else: - text_encoder_lora_params = None + text_encoder_lora_params = None text_encoder_two_lora_params = None pipeline_components = {