From f4881dd4da355129226a65e7f5898946403fd4fb Mon Sep 17 00:00:00 2001 From: William Berman Date: Wed, 6 Sep 2023 15:46:13 -0700 Subject: [PATCH] Revert "[Core] better support offloading when side loading is enabled. (#4855)" This reverts commit e4b8e7928b2c1972b37af67c64ccc67e42578f8c. --- src/diffusers/loaders.py | 43 -------------- .../pipeline_controlnet_inpaint_sd_xl.py | 26 --------- .../controlnet/pipeline_controlnet_sd_xl.py | 26 --------- .../pipeline_stable_diffusion_xl.py | 26 --------- .../pipeline_stable_diffusion_xl_img2img.py | 26 --------- .../pipeline_stable_diffusion_xl_inpaint.py | 26 --------- tests/models/test_lora_layers.py | 56 +------------------ .../stable_diffusion/test_stable_diffusion.py | 50 ----------------- 8 files changed, 1 insertion(+), 278 deletions(-) diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py index 1de899cad927..e4d8f1a3a5eb 100644 --- a/src/diffusers/loaders.py +++ b/src/diffusers/loaders.py @@ -45,7 +45,6 @@ if is_accelerate_available(): from accelerate import init_empty_weights - from accelerate.hooks import AlignDevicesHook, CpuOffload, remove_hook_from_module from accelerate.utils import set_module_tensor_to_device logger = logging.get_logger(__name__) @@ -779,21 +778,6 @@ def load_textual_inversion( f" `{self.load_textual_inversion.__name__}`" ) - # Remove any existing hooks. - is_model_cpu_offload = False - is_sequential_cpu_offload = False - recursive = False - for _, component in self.components.items(): - if isinstance(component, nn.Module): - if hasattr(component, "_hf_hook"): - is_model_cpu_offload = isinstance(getattr(component, "_hf_hook"), CpuOffload) - is_sequential_cpu_offload = isinstance(getattr(component, "_hf_hook"), AlignDevicesHook) - logger.info( - "Accelerate hooks detected. Since you have called `load_textual_inversion()`, the previous hooks will be first removed. Then the textual inversion parameters will be loaded and the hooks will be applied again." - ) - recursive = is_sequential_cpu_offload - remove_hook_from_module(component, recurse=recursive) - cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE) force_download = kwargs.pop("force_download", False) resume_download = kwargs.pop("resume_download", False) @@ -947,12 +931,6 @@ def load_textual_inversion( for token_id, embedding in token_ids_and_embeddings: text_encoder.get_input_embeddings().weight.data[token_id] = embedding - # offload back - if is_model_cpu_offload: - self.enable_model_cpu_offload() - elif is_sequential_cpu_offload: - self.enable_sequential_cpu_offload() - class LoraLoaderMixin: r""" @@ -984,21 +962,6 @@ def load_lora_weights(self, pretrained_model_name_or_path_or_dict: Union[str, Di kwargs (`dict`, *optional*): See [`~loaders.LoraLoaderMixin.lora_state_dict`]. """ - # Remove any existing hooks. - is_model_cpu_offload = False - is_sequential_cpu_offload = False - recurive = False - for _, component in self.components.items(): - if isinstance(component, nn.Module): - if hasattr(component, "_hf_hook"): - is_model_cpu_offload = isinstance(getattr(component, "_hf_hook"), CpuOffload) - is_sequential_cpu_offload = isinstance(getattr(component, "_hf_hook"), AlignDevicesHook) - logger.info( - "Accelerate hooks detected. Since you have called `load_lora_weights()`, the previous hooks will be first removed. Then the LoRA parameters will be loaded and the hooks will be applied again." - ) - recurive = is_sequential_cpu_offload - remove_hook_from_module(component, recurse=recurive) - state_dict, network_alphas = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs) self.load_lora_into_unet(state_dict, network_alphas=network_alphas, unet=self.unet) self.load_lora_into_text_encoder( @@ -1008,12 +971,6 @@ def load_lora_weights(self, pretrained_model_name_or_path_or_dict: Union[str, Di lora_scale=self.lora_scale, ) - # Offload back. - if is_model_cpu_offload: - self.enable_model_cpu_offload() - elif is_sequential_cpu_offload: - self.enable_sequential_cpu_offload() - @classmethod def lora_state_dict( cls, diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py index c64204501b97..b20d1f0c636e 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py @@ -1549,26 +1549,6 @@ def load_lora_weights(self, pretrained_model_name_or_path_or_dict: Union[str, Di # We could have accessed the unet config from `lora_state_dict()` too. We pass # it here explicitly to be able to tell that it's coming from an SDXL # pipeline. - - # Remove any existing hooks. - if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"): - from accelerate.hooks import AlignDevicesHook, CpuOffload, remove_hook_from_module - else: - raise ImportError("Offloading requires `accelerate v0.17.0` or higher.") - - is_model_cpu_offload = False - is_sequential_cpu_offload = False - recursive = False - for _, component in self.components.items(): - if isinstance(component, torch.nn.Module): - if hasattr(component, "_hf_hook"): - is_model_cpu_offload = isinstance(getattr(component, "_hf_hook"), CpuOffload) - is_sequential_cpu_offload = isinstance(getattr(component, "_hf_hook"), AlignDevicesHook) - logger.info( - "Accelerate hooks detected. Since you have called `load_lora_weights()`, the previous hooks will be first removed. Then the LoRA parameters will be loaded and the hooks will be applied again." - ) - recursive = is_sequential_cpu_offload - remove_hook_from_module(component, recurse=recursive) state_dict, network_alphas = self.lora_state_dict( pretrained_model_name_or_path_or_dict, unet_config=self.unet.config, @@ -1596,12 +1576,6 @@ def load_lora_weights(self, pretrained_model_name_or_path_or_dict: Union[str, Di lora_scale=self.lora_scale, ) - # Offload back. - if is_model_cpu_offload: - self.enable_model_cpu_offload() - elif is_sequential_cpu_offload: - self.enable_sequential_cpu_offload() - @classmethod # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.save_lora_weights def save_lora_weights( diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py index ef6b54e81548..6f2b36ba6976 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py @@ -1216,26 +1216,6 @@ def load_lora_weights(self, pretrained_model_name_or_path_or_dict: Union[str, Di # We could have accessed the unet config from `lora_state_dict()` too. We pass # it here explicitly to be able to tell that it's coming from an SDXL # pipeline. - - # Remove any existing hooks. - if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"): - from accelerate.hooks import AlignDevicesHook, CpuOffload, remove_hook_from_module - else: - raise ImportError("Offloading requires `accelerate v0.17.0` or higher.") - - is_model_cpu_offload = False - is_sequential_cpu_offload = False - recursive = False - for _, component in self.components.items(): - if isinstance(component, torch.nn.Module): - if hasattr(component, "_hf_hook"): - is_model_cpu_offload = isinstance(getattr(component, "_hf_hook"), CpuOffload) - is_sequential_cpu_offload = isinstance(getattr(component, "_hf_hook"), AlignDevicesHook) - logger.info( - "Accelerate hooks detected. Since you have called `load_lora_weights()`, the previous hooks will be first removed. Then the LoRA parameters will be loaded and the hooks will be applied again." - ) - recursive = is_sequential_cpu_offload - remove_hook_from_module(component, recurse=recursive) state_dict, network_alphas = self.lora_state_dict( pretrained_model_name_or_path_or_dict, unet_config=self.unet.config, @@ -1263,12 +1243,6 @@ def load_lora_weights(self, pretrained_model_name_or_path_or_dict: Union[str, Di lora_scale=self.lora_scale, ) - # Offload back. - if is_model_cpu_offload: - self.enable_model_cpu_offload() - elif is_sequential_cpu_offload: - self.enable_sequential_cpu_offload() - @classmethod # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.save_lora_weights def save_lora_weights( diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py index 7b7755085ed6..459b47de7ea1 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py @@ -922,26 +922,6 @@ def load_lora_weights(self, pretrained_model_name_or_path_or_dict: Union[str, Di # We could have accessed the unet config from `lora_state_dict()` too. We pass # it here explicitly to be able to tell that it's coming from an SDXL # pipeline. - - # Remove any existing hooks. - if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"): - from accelerate.hooks import AlignDevicesHook, CpuOffload, remove_hook_from_module - else: - raise ImportError("Offloading requires `accelerate v0.17.0` or higher.") - - is_model_cpu_offload = False - is_sequential_cpu_offload = False - recursive = False - for _, component in self.components.items(): - if isinstance(component, torch.nn.Module): - if hasattr(component, "_hf_hook"): - is_model_cpu_offload = isinstance(getattr(component, "_hf_hook"), CpuOffload) - is_sequential_cpu_offload = isinstance(getattr(component, "_hf_hook"), AlignDevicesHook) - logger.info( - "Accelerate hooks detected. Since you have called `load_lora_weights()`, the previous hooks will be first removed. Then the LoRA parameters will be loaded and the hooks will be applied again." - ) - recursive = is_sequential_cpu_offload - remove_hook_from_module(component, recurse=recursive) state_dict, network_alphas = self.lora_state_dict( pretrained_model_name_or_path_or_dict, unet_config=self.unet.config, @@ -969,12 +949,6 @@ def load_lora_weights(self, pretrained_model_name_or_path_or_dict: Union[str, Di lora_scale=self.lora_scale, ) - # Offload back. - if is_model_cpu_offload: - self.enable_model_cpu_offload() - elif is_sequential_cpu_offload: - self.enable_sequential_cpu_offload() - @classmethod def save_lora_weights( self, diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py index 04902234d54e..b9e2b263b893 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py @@ -1072,26 +1072,6 @@ def load_lora_weights(self, pretrained_model_name_or_path_or_dict: Union[str, Di # We could have accessed the unet config from `lora_state_dict()` too. We pass # it here explicitly to be able to tell that it's coming from an SDXL # pipeline. - - # Remove any existing hooks. - if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"): - from accelerate.hooks import AlignDevicesHook, CpuOffload, remove_hook_from_module - else: - raise ImportError("Offloading requires `accelerate v0.17.0` or higher.") - - is_model_cpu_offload = False - is_sequential_cpu_offload = False - recursive = False - for _, component in self.components.items(): - if isinstance(component, torch.nn.Module): - if hasattr(component, "_hf_hook"): - is_model_cpu_offload = isinstance(getattr(component, "_hf_hook"), CpuOffload) - is_sequential_cpu_offload = isinstance(getattr(component, "_hf_hook"), AlignDevicesHook) - logger.info( - "Accelerate hooks detected. Since you have called `load_lora_weights()`, the previous hooks will be first removed. Then the LoRA parameters will be loaded and the hooks will be applied again." - ) - recursive = is_sequential_cpu_offload - remove_hook_from_module(component, recurse=recursive) state_dict, network_alphas = self.lora_state_dict( pretrained_model_name_or_path_or_dict, unet_config=self.unet.config, @@ -1119,12 +1099,6 @@ def load_lora_weights(self, pretrained_model_name_or_path_or_dict: Union[str, Di lora_scale=self.lora_scale, ) - # Offload back. - if is_model_cpu_offload: - self.enable_model_cpu_offload() - elif is_sequential_cpu_offload: - self.enable_sequential_cpu_offload() - @classmethod # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.save_lora_weights def save_lora_weights( diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py index 1d86dff702ef..0b00e31a0a50 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py @@ -1392,26 +1392,6 @@ def load_lora_weights(self, pretrained_model_name_or_path_or_dict: Union[str, Di # We could have accessed the unet config from `lora_state_dict()` too. We pass # it here explicitly to be able to tell that it's coming from an SDXL # pipeline. - - # Remove any existing hooks. - if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"): - from accelerate.hooks import AlignDevicesHook, CpuOffload, remove_hook_from_module - else: - raise ImportError("Offloading requires `accelerate v0.17.0` or higher.") - - is_model_cpu_offload = False - is_sequential_cpu_offload = False - recursive = False - for _, component in self.components.items(): - if isinstance(component, torch.nn.Module): - if hasattr(component, "_hf_hook"): - is_model_cpu_offload = isinstance(getattr(component, "_hf_hook"), CpuOffload) - is_sequential_cpu_offload = isinstance(getattr(component, "_hf_hook"), AlignDevicesHook) - logger.info( - "Accelerate hooks detected. Since you have called `load_lora_weights()`, the previous hooks will be first removed. Then the LoRA parameters will be loaded and the hooks will be applied again." - ) - recursive = is_sequential_cpu_offload - remove_hook_from_module(component, recurse=recursive) state_dict, network_alphas = self.lora_state_dict( pretrained_model_name_or_path_or_dict, unet_config=self.unet.config, @@ -1439,12 +1419,6 @@ def load_lora_weights(self, pretrained_model_name_or_path_or_dict: Union[str, Di lora_scale=self.lora_scale, ) - # Offload back. - if is_model_cpu_offload: - self.enable_model_cpu_offload() - elif is_sequential_cpu_offload: - self.enable_sequential_cpu_offload() - @classmethod # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.save_lora_weights def save_lora_weights( diff --git a/tests/models/test_lora_layers.py b/tests/models/test_lora_layers.py index c49ea7f2d960..6b3498ae5f41 100644 --- a/tests/models/test_lora_layers.py +++ b/tests/models/test_lora_layers.py @@ -1081,42 +1081,6 @@ def test_a1111(self): self.assertTrue(np.allclose(images, expected, atol=1e-3)) - def test_a1111_with_model_cpu_offload(self): - generator = torch.Generator().manual_seed(0) - - pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None) - pipe.enable_model_cpu_offload() - lora_model_id = "hf-internal-testing/civitai-light-shadow-lora" - lora_filename = "light_and_shadow.safetensors" - pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) - - images = pipe( - "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 - ).images - - images = images[0, -3:, -3:, -1].flatten() - expected = np.array([0.3636, 0.3708, 0.3694, 0.3679, 0.3829, 0.3677, 0.3692, 0.3688, 0.3292]) - - self.assertTrue(np.allclose(images, expected, atol=1e-3)) - - def test_a1111_with_sequential_cpu_offload(self): - generator = torch.Generator().manual_seed(0) - - pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None) - pipe.enable_sequential_cpu_offload() - lora_model_id = "hf-internal-testing/civitai-light-shadow-lora" - lora_filename = "light_and_shadow.safetensors" - pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) - - images = pipe( - "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 - ).images - - images = images[0, -3:, -3:, -1].flatten() - expected = np.array([0.3636, 0.3708, 0.3694, 0.3679, 0.3829, 0.3677, 0.3692, 0.3688, 0.3292]) - - self.assertTrue(np.allclose(images, expected, atol=1e-3)) - def test_kohya_sd_v15_with_higher_dimensions(self): generator = torch.Generator().manual_seed(0) @@ -1293,10 +1257,10 @@ def test_sdxl_1_0_lora(self): generator = torch.Generator().manual_seed(0) pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0") - pipe.enable_model_cpu_offload() lora_model_id = "hf-internal-testing/sdxl-1.0-lora" lora_filename = "sd_xl_offset_example-lora_1.0.safetensors" pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) + pipe.enable_model_cpu_offload() images = pipe( "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 @@ -1447,21 +1411,3 @@ def test_sdxl_1_0_fuse_unfuse_all(self): assert state_dicts_almost_equal(text_encoder_1_sd, pipe.text_encoder.state_dict()) assert state_dicts_almost_equal(text_encoder_2_sd, pipe.text_encoder_2.state_dict()) assert state_dicts_almost_equal(unet_sd, pipe.unet.state_dict()) - - def test_sdxl_1_0_lora_with_sequential_cpu_offloading(self): - generator = torch.Generator().manual_seed(0) - - pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0") - pipe.enable_sequential_cpu_offload() - lora_model_id = "hf-internal-testing/sdxl-1.0-lora" - lora_filename = "sd_xl_offset_example-lora_1.0.safetensors" - pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) - - images = pipe( - "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 - ).images - - images = images[0, -3:, -3:, -1].flatten() - expected = np.array([0.4468, 0.4087, 0.4134, 0.366, 0.3202, 0.3505, 0.3786, 0.387, 0.3535]) - - self.assertTrue(np.allclose(images, expected, atol=1e-3)) diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion.py b/tests/pipelines/stable_diffusion/test_stable_diffusion.py index 31de557a0ac3..7935a63eceaa 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion.py @@ -1019,56 +1019,6 @@ def test_stable_diffusion_textual_inversion(self): max_diff = np.abs(expected_image - image).max() assert max_diff < 8e-1 - def test_stable_diffusion_textual_inversion_with_model_cpu_offload(self): - pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4") - pipe.enable_model_cpu_offload() - pipe.load_textual_inversion("sd-concepts-library/low-poly-hd-logos-icons") - - a111_file = hf_hub_download("hf-internal-testing/text_inv_embedding_a1111_format", "winter_style.pt") - a111_file_neg = hf_hub_download( - "hf-internal-testing/text_inv_embedding_a1111_format", "winter_style_negative.pt" - ) - pipe.load_textual_inversion(a111_file) - pipe.load_textual_inversion(a111_file_neg) - - generator = torch.Generator(device="cpu").manual_seed(1) - - prompt = "An logo of a turtle in strong Style-Winter with " - neg_prompt = "Style-Winter-neg" - - image = pipe(prompt=prompt, negative_prompt=neg_prompt, generator=generator, output_type="np").images[0] - expected_image = load_numpy( - "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/text_inv/winter_logo_style.npy" - ) - - max_diff = np.abs(expected_image - image).max() - assert max_diff < 8e-1 - - def test_stable_diffusion_textual_inversion_with_sequential_cpu_offload(self): - pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4") - pipe.enable_sequential_cpu_offload() - pipe.load_textual_inversion("sd-concepts-library/low-poly-hd-logos-icons") - - a111_file = hf_hub_download("hf-internal-testing/text_inv_embedding_a1111_format", "winter_style.pt") - a111_file_neg = hf_hub_download( - "hf-internal-testing/text_inv_embedding_a1111_format", "winter_style_negative.pt" - ) - pipe.load_textual_inversion(a111_file) - pipe.load_textual_inversion(a111_file_neg) - - generator = torch.Generator(device="cpu").manual_seed(1) - - prompt = "An logo of a turtle in strong Style-Winter with " - neg_prompt = "Style-Winter-neg" - - image = pipe(prompt=prompt, negative_prompt=neg_prompt, generator=generator, output_type="np").images[0] - expected_image = load_numpy( - "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/text_inv/winter_logo_style.npy" - ) - - max_diff = np.abs(expected_image - image).max() - assert max_diff < 8e-1 - @require_torch_2 def test_stable_diffusion_compile(self): seed = 0