From 4d6f053229ec9219398d5f864046cdb14482b03e Mon Sep 17 00:00:00 2001 From: Disty0 Date: Fri, 7 Jul 2023 13:06:52 +0300 Subject: [PATCH 1/2] Add gpu option to cpu offload --- examples/community/composable_stable_diffusion.py | 4 ++-- examples/community/lpw_stable_diffusion.py | 8 ++++---- examples/community/sd_text2img_k_diffusion.py | 4 ++-- examples/community/stable_diffusion_controlnet_img2img.py | 8 ++++---- examples/community/stable_diffusion_controlnet_inpaint.py | 8 ++++---- .../stable_diffusion_controlnet_inpaint_img2img.py | 8 ++++---- examples/community/stable_diffusion_ipex.py | 8 ++++---- examples/community/stable_diffusion_repaint.py | 6 +++--- examples/community/unclip_image_interpolation.py | 4 ++-- examples/community/unclip_text_interpolation.py | 4 ++-- .../pipelines/alt_diffusion/pipeline_alt_diffusion.py | 8 ++++---- .../alt_diffusion/pipeline_alt_diffusion_img2img.py | 8 ++++---- src/diffusers/pipelines/audioldm/pipeline_audioldm.py | 4 ++-- .../consistency_models/pipeline_consistency_models.py | 8 ++++---- src/diffusers/pipelines/controlnet/pipeline_controlnet.py | 8 ++++---- .../pipelines/controlnet/pipeline_controlnet_img2img.py | 8 ++++---- .../pipelines/controlnet/pipeline_controlnet_inpaint.py | 8 ++++---- src/diffusers/pipelines/deepfloyd_if/pipeline_if.py | 8 ++++---- .../pipelines/deepfloyd_if/pipeline_if_img2img.py | 8 ++++---- .../deepfloyd_if/pipeline_if_img2img_superresolution.py | 8 ++++---- .../pipelines/deepfloyd_if/pipeline_if_inpainting.py | 8 ++++---- .../pipeline_if_inpainting_superresolution.py | 8 ++++---- .../pipelines/deepfloyd_if/pipeline_if_superresolution.py | 8 ++++---- src/diffusers/pipelines/kandinsky/pipeline_kandinsky.py | 8 ++++---- .../pipelines/kandinsky/pipeline_kandinsky_img2img.py | 8 ++++---- .../pipelines/kandinsky/pipeline_kandinsky_inpaint.py | 8 ++++---- .../pipelines/kandinsky/pipeline_kandinsky_prior.py | 4 ++-- .../pipelines/kandinsky2_2/pipeline_kandinsky2_2.py | 8 ++++---- .../kandinsky2_2/pipeline_kandinsky2_2_controlnet.py | 8 ++++---- .../pipeline_kandinsky2_2_controlnet_img2img.py | 8 ++++---- .../kandinsky2_2/pipeline_kandinsky2_2_img2img.py | 8 ++++---- .../kandinsky2_2/pipeline_kandinsky2_2_inpainting.py | 8 ++++---- .../pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py | 4 ++-- .../kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py | 4 ++-- .../paint_by_example/pipeline_paint_by_example.py | 4 ++-- src/diffusers/pipelines/shap_e/pipeline_shap_e.py | 8 ++++---- src/diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py | 4 ++-- .../stable_diffusion/pipeline_cycle_diffusion.py | 8 ++++---- .../stable_diffusion/pipeline_stable_diffusion.py | 8 ++++---- .../pipeline_stable_diffusion_attend_and_excite.py | 4 ++-- .../pipeline_stable_diffusion_depth2img.py | 4 ++-- .../pipeline_stable_diffusion_diffedit.py | 8 ++++---- .../pipeline_stable_diffusion_image_variation.py | 4 ++-- .../stable_diffusion/pipeline_stable_diffusion_img2img.py | 8 ++++---- .../stable_diffusion/pipeline_stable_diffusion_inpaint.py | 8 ++++---- .../pipeline_stable_diffusion_inpaint_legacy.py | 8 ++++---- .../pipeline_stable_diffusion_instruct_pix2pix.py | 8 ++++---- .../pipeline_stable_diffusion_k_diffusion.py | 8 ++++---- .../pipeline_stable_diffusion_latent_upscale.py | 4 ++-- .../stable_diffusion/pipeline_stable_diffusion_ldm3d.py | 8 ++++---- .../pipeline_stable_diffusion_model_editing.py | 4 ++-- .../pipeline_stable_diffusion_panorama.py | 4 ++-- .../pipeline_stable_diffusion_paradigms.py | 8 ++++---- .../pipeline_stable_diffusion_pix2pix_zero.py | 8 ++++---- .../stable_diffusion/pipeline_stable_diffusion_sag.py | 4 ++-- .../stable_diffusion/pipeline_stable_diffusion_upscale.py | 8 ++++---- .../pipelines/stable_diffusion/pipeline_stable_unclip.py | 8 ++++---- .../stable_diffusion/pipeline_stable_unclip_img2img.py | 8 ++++---- .../stable_diffusion_xl/pipeline_stable_diffusion_xl.py | 8 ++++---- .../pipeline_stable_diffusion_xl_img2img.py | 8 ++++---- .../pipeline_text_to_video_synth.py | 8 ++++---- .../pipeline_text_to_video_synth_img2img.py | 8 ++++---- src/diffusers/pipelines/unclip/pipeline_unclip.py | 4 ++-- .../pipelines/unclip/pipeline_unclip_image_variation.py | 4 ++-- .../pipelines/unidiffuser/pipeline_unidiffuser.py | 8 ++++---- .../pipeline_versatile_diffusion_dual_guided.py | 4 ++-- .../pipeline_versatile_diffusion_image_variation.py | 4 ++-- .../pipeline_versatile_diffusion_text_to_image.py | 4 ++-- 68 files changed, 227 insertions(+), 227 deletions(-) diff --git a/examples/community/composable_stable_diffusion.py b/examples/community/composable_stable_diffusion.py index 95292f5bdae8..c8efd214eee6 100644 --- a/examples/community/composable_stable_diffusion.py +++ b/examples/community/composable_stable_diffusion.py @@ -179,7 +179,7 @@ def disable_vae_slicing(self): """ self.vae.disable_slicing() - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a @@ -190,7 +190,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae]: if cpu_offloaded_model is not None: diff --git a/examples/community/lpw_stable_diffusion.py b/examples/community/lpw_stable_diffusion.py index 2970aae4b169..3a68d5faa75e 100644 --- a/examples/community/lpw_stable_diffusion.py +++ b/examples/community/lpw_stable_diffusion.py @@ -567,7 +567,7 @@ def disable_vae_tiling(self): self.vae.disable_tiling() # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_sequential_cpu_offload - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a @@ -580,7 +580,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) @@ -593,7 +593,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True) # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_model_cpu_offload - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -605,7 +605,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/examples/community/sd_text2img_k_diffusion.py b/examples/community/sd_text2img_k_diffusion.py index b7fbc46b67cb..09fe1d03d1ad 100755 --- a/examples/community/sd_text2img_k_diffusion.py +++ b/examples/community/sd_text2img_k_diffusion.py @@ -146,7 +146,7 @@ def disable_attention_slicing(self): # set slice_size = `None` to disable `attention slicing` self.enable_attention_slicing(None) - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a @@ -157,7 +157,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae, self.safety_checker]: if cpu_offloaded_model is not None: diff --git a/examples/community/stable_diffusion_controlnet_img2img.py b/examples/community/stable_diffusion_controlnet_img2img.py index a8a51b5489a3..c44d156dd830 100644 --- a/examples/community/stable_diffusion_controlnet_img2img.py +++ b/examples/community/stable_diffusion_controlnet_img2img.py @@ -199,7 +199,7 @@ def disable_vae_slicing(self): """ self.vae.disable_slicing() - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae, controlnet, and safety checker have their state dicts saved to CPU and then are moved to a @@ -212,7 +212,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae, self.controlnet]: cpu_offload(cpu_offloaded_model, device) @@ -220,7 +220,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): if self.safety_checker is not None: cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True) - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -232,7 +232,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") hook = None for cpu_offloaded_model in [self.text_encoder, self.unet, self.vae]: diff --git a/examples/community/stable_diffusion_controlnet_inpaint.py b/examples/community/stable_diffusion_controlnet_inpaint.py index aae199f91b9e..16a3aade0b93 100644 --- a/examples/community/stable_diffusion_controlnet_inpaint.py +++ b/examples/community/stable_diffusion_controlnet_inpaint.py @@ -298,7 +298,7 @@ def disable_vae_slicing(self): """ self.vae.disable_slicing() - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae, controlnet, and safety checker have their state dicts saved to CPU and then are moved to a @@ -311,7 +311,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae, self.controlnet]: cpu_offload(cpu_offloaded_model, device) @@ -319,7 +319,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): if self.safety_checker is not None: cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True) - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -331,7 +331,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") hook = None for cpu_offloaded_model in [self.text_encoder, self.unet, self.vae]: diff --git a/examples/community/stable_diffusion_controlnet_inpaint_img2img.py b/examples/community/stable_diffusion_controlnet_inpaint_img2img.py index bad1df0e13fb..7a4a1d33deb1 100644 --- a/examples/community/stable_diffusion_controlnet_inpaint_img2img.py +++ b/examples/community/stable_diffusion_controlnet_inpaint_img2img.py @@ -283,7 +283,7 @@ def disable_vae_slicing(self): """ self.vae.disable_slicing() - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae, controlnet, and safety checker have their state dicts saved to CPU and then are moved to a @@ -296,7 +296,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae, self.controlnet]: cpu_offload(cpu_offloaded_model, device) @@ -304,7 +304,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): if self.safety_checker is not None: cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True) - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -316,7 +316,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") hook = None for cpu_offloaded_model in [self.text_encoder, self.unet, self.vae]: diff --git a/examples/community/stable_diffusion_ipex.py b/examples/community/stable_diffusion_ipex.py index 9abe16d56f10..a3c1f5ff58bf 100644 --- a/examples/community/stable_diffusion_ipex.py +++ b/examples/community/stable_diffusion_ipex.py @@ -341,7 +341,7 @@ def disable_vae_tiling(self): """ self.vae.disable_tiling() - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a @@ -354,7 +354,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) @@ -366,7 +366,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): if self.safety_checker is not None: cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True) - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -378,7 +378,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/examples/community/stable_diffusion_repaint.py b/examples/community/stable_diffusion_repaint.py index 3fd63d4b213a..c34fed237174 100644 --- a/examples/community/stable_diffusion_repaint.py +++ b/examples/community/stable_diffusion_repaint.py @@ -289,7 +289,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) @@ -302,7 +302,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True) # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_model_cpu_offload - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -314,7 +314,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/examples/community/unclip_image_interpolation.py b/examples/community/unclip_image_interpolation.py index 618ac25bdc95..a3dff7b1f81b 100644 --- a/examples/community/unclip_image_interpolation.py +++ b/examples/community/unclip_image_interpolation.py @@ -204,7 +204,7 @@ def _encode_image(self, image, device, num_images_per_prompt, image_embeddings: return image_embeddings # Copied from diffusers.pipelines.unclip.pipeline_unclip_image_variation.UnCLIPImageVariationPipeline.enable_sequential_cpu_offload - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, the pipeline's models have their state dicts saved to CPU and then are moved to a `torch.device('meta') and loaded to GPU only @@ -215,7 +215,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") models = [ self.decoder, diff --git a/examples/community/unclip_text_interpolation.py b/examples/community/unclip_text_interpolation.py index 290f45317004..41d0e5785966 100644 --- a/examples/community/unclip_text_interpolation.py +++ b/examples/community/unclip_text_interpolation.py @@ -212,7 +212,7 @@ def _encode_prompt( return prompt_embeds, text_encoder_hidden_states, text_mask # Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline.enable_sequential_cpu_offload - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, the pipeline's models have their state dicts saved to CPU and then are moved to a `torch.device('meta') and loaded to GPU only @@ -223,7 +223,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") # TODO: self.prior.post_process_latents is not covered by the offload hooks, so it fails if added to the list models = [ diff --git a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py index 5a4746d24e06..390009f6c523 100644 --- a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py +++ b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py @@ -226,7 +226,7 @@ def disable_vae_tiling(self): """ self.vae.disable_tiling() - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a @@ -239,7 +239,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) @@ -251,7 +251,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): if self.safety_checker is not None: cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True) - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -263,7 +263,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py index 21c1f0591a44..9e527e1005c1 100644 --- a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +++ b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py @@ -224,7 +224,7 @@ def __init__( self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.register_to_config(requires_safety_checker=requires_safety_checker) - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a @@ -237,7 +237,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) @@ -249,7 +249,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): if self.safety_checker is not None: cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True) - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -261,7 +261,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/audioldm/pipeline_audioldm.py b/src/diffusers/pipelines/audioldm/pipeline_audioldm.py index c80e6ef99288..a1f9882cbbb9 100644 --- a/src/diffusers/pipelines/audioldm/pipeline_audioldm.py +++ b/src/diffusers/pipelines/audioldm/pipeline_audioldm.py @@ -108,7 +108,7 @@ def disable_vae_slicing(self): """ self.vae.disable_slicing() - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae and vocoder have their state dicts saved to CPU and then are moved to a `torch.device('meta') @@ -119,7 +119,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae, self.vocoder]: cpu_offload(cpu_offloaded_model, device) diff --git a/src/diffusers/pipelines/consistency_models/pipeline_consistency_models.py b/src/diffusers/pipelines/consistency_models/pipeline_consistency_models.py index 4e72e3fdbafe..94fa8ddd71f5 100644 --- a/src/diffusers/pipelines/consistency_models/pipeline_consistency_models.py +++ b/src/diffusers/pipelines/consistency_models/pipeline_consistency_models.py @@ -76,7 +76,7 @@ def __init__(self, unet: UNet2DModel, scheduler: CMStochasticIterativeScheduler) self.safety_checker = None - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a @@ -89,7 +89,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) @@ -101,7 +101,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): if self.safety_checker is not None: cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True) - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -113,7 +113,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py index c266e8b20e74..848c56e921bf 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py @@ -212,7 +212,7 @@ def disable_vae_tiling(self): """ self.vae.disable_tiling() - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae, controlnet, and safety checker have their state dicts saved to CPU and then are moved to a @@ -225,7 +225,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae, self.controlnet]: cpu_offload(cpu_offloaded_model, device) @@ -233,7 +233,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): if self.safety_checker is not None: cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True) - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -245,7 +245,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") hook = None for cpu_offloaded_model in [self.text_encoder, self.unet, self.vae]: diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py index fd013c4974f1..0aeca5e203a0 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py @@ -238,7 +238,7 @@ def disable_vae_tiling(self): """ self.vae.disable_tiling() - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae, controlnet, and safety checker have their state dicts saved to CPU and then are moved to a @@ -251,7 +251,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae, self.controlnet]: cpu_offload(cpu_offloaded_model, device) @@ -259,7 +259,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): if self.safety_checker is not None: cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True) - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -271,7 +271,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") hook = None for cpu_offloaded_model in [self.text_encoder, self.unet, self.vae]: diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py index 7de3f1dd9d88..d7156e486cf5 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py @@ -355,7 +355,7 @@ def disable_vae_tiling(self): """ self.vae.disable_tiling() - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae, controlnet, and safety checker have their state dicts saved to CPU and then are moved to a @@ -368,7 +368,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae, self.controlnet]: cpu_offload(cpu_offloaded_model, device) @@ -376,7 +376,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): if self.safety_checker is not None: cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True) - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -388,7 +388,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") hook = None for cpu_offloaded_model in [self.text_encoder, self.unet, self.vae]: diff --git a/src/diffusers/pipelines/deepfloyd_if/pipeline_if.py b/src/diffusers/pipelines/deepfloyd_if/pipeline_if.py index cd1015dc03bb..d942fc9a20c7 100644 --- a/src/diffusers/pipelines/deepfloyd_if/pipeline_if.py +++ b/src/diffusers/pipelines/deepfloyd_if/pipeline_if.py @@ -144,7 +144,7 @@ def __init__( ) self.register_to_config(requires_safety_checker=requires_safety_checker) - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, the pipeline's models have their state dicts saved to CPU and then are moved to a `torch.device('meta') and loaded to GPU only @@ -155,7 +155,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") models = [ self.text_encoder, @@ -168,7 +168,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): if self.safety_checker is not None: cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True) - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -180,7 +180,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py b/src/diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py index 6bae2071173b..0aad3c55f2d5 100644 --- a/src/diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +++ b/src/diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py @@ -169,7 +169,7 @@ def __init__( self.register_to_config(requires_safety_checker=requires_safety_checker) # Copied from diffusers.pipelines.deepfloyd_if.pipeline_if.IFPipeline.enable_sequential_cpu_offload - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, the pipeline's models have their state dicts saved to CPU and then are moved to a `torch.device('meta') and loaded to GPU only @@ -180,7 +180,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") models = [ self.text_encoder, @@ -194,7 +194,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True) # Copied from diffusers.pipelines.deepfloyd_if.pipeline_if.IFPipeline.enable_model_cpu_offload - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -206,7 +206,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py b/src/diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py index 0ee9c6ba331d..630ae2742931 100644 --- a/src/diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +++ b/src/diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py @@ -180,7 +180,7 @@ def __init__( self.register_to_config(requires_safety_checker=requires_safety_checker) # Copied from diffusers.pipelines.deepfloyd_if.pipeline_if.IFPipeline.enable_sequential_cpu_offload - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, the pipeline's models have their state dicts saved to CPU and then are moved to a `torch.device('meta') and loaded to GPU only @@ -191,7 +191,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") models = [ self.text_encoder, @@ -205,7 +205,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True) # Copied from diffusers.pipelines.deepfloyd_if.pipeline_if.IFPipeline.enable_model_cpu_offload - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -217,7 +217,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py b/src/diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py index 9c1f71126ac5..0a4d370cc03d 100644 --- a/src/diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +++ b/src/diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py @@ -172,7 +172,7 @@ def __init__( self.register_to_config(requires_safety_checker=requires_safety_checker) # Copied from diffusers.pipelines.deepfloyd_if.pipeline_if.IFPipeline.enable_sequential_cpu_offload - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, the pipeline's models have their state dicts saved to CPU and then are moved to a `torch.device('meta') and loaded to GPU only @@ -183,7 +183,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") models = [ self.text_encoder, @@ -197,7 +197,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True) # Copied from diffusers.pipelines.deepfloyd_if.pipeline_if.IFPipeline.enable_model_cpu_offload - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -209,7 +209,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py b/src/diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py index 6a90f2b765d4..6f42bce4838a 100644 --- a/src/diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +++ b/src/diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py @@ -182,7 +182,7 @@ def __init__( self.register_to_config(requires_safety_checker=requires_safety_checker) # Copied from diffusers.pipelines.deepfloyd_if.pipeline_if.IFPipeline.enable_sequential_cpu_offload - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, the pipeline's models have their state dicts saved to CPU and then are moved to a `torch.device('meta') and loaded to GPU only @@ -193,7 +193,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") models = [ self.text_encoder, @@ -207,7 +207,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True) # Copied from diffusers.pipelines.deepfloyd_if.pipeline_if.IFPipeline.enable_model_cpu_offload - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -219,7 +219,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py b/src/diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py index 86d9574b97e1..25fd556bddcc 100644 --- a/src/diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +++ b/src/diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py @@ -138,7 +138,7 @@ def __init__( self.register_to_config(requires_safety_checker=requires_safety_checker) # Copied from diffusers.pipelines.deepfloyd_if.pipeline_if.IFPipeline.enable_sequential_cpu_offload - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, the pipeline's models have their state dicts saved to CPU and then are moved to a `torch.device('meta') and loaded to GPU only @@ -149,7 +149,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") models = [ self.text_encoder, @@ -163,7 +163,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True) # Copied from diffusers.pipelines.deepfloyd_if.pipeline_if.IFPipeline.enable_model_cpu_offload - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -175,7 +175,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/kandinsky/pipeline_kandinsky.py b/src/diffusers/pipelines/kandinsky/pipeline_kandinsky.py index 489f59b3ad9a..28c459060b32 100644 --- a/src/diffusers/pipelines/kandinsky/pipeline_kandinsky.py +++ b/src/diffusers/pipelines/kandinsky/pipeline_kandinsky.py @@ -229,7 +229,7 @@ def _encode_prompt( return prompt_embeds, text_encoder_hidden_states, text_mask - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, the pipeline's models have their state dicts saved to CPU and then are moved to a `torch.device('meta') and loaded to GPU only @@ -240,7 +240,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") models = [ self.unet, @@ -251,7 +251,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): if cpu_offloaded_model is not None: cpu_offload(cpu_offloaded_model, device) - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -263,7 +263,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py b/src/diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py index f32528617e5a..2672d15702fb 100644 --- a/src/diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +++ b/src/diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py @@ -264,7 +264,7 @@ def _encode_prompt( return prompt_embeds, text_encoder_hidden_states, text_mask - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, the pipeline's models have their state dicts saved to CPU and then are moved to a `torch.device('meta') and loaded to GPU only @@ -275,7 +275,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") models = [ self.unet, @@ -286,7 +286,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): if cpu_offloaded_model is not None: cpu_offload(cpu_offloaded_model, device) - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -298,7 +298,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py b/src/diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py index 75e58386cff0..486c0ef98180 100644 --- a/src/diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +++ b/src/diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py @@ -389,7 +389,7 @@ def _encode_prompt( return prompt_embeds, text_encoder_hidden_states, text_mask - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, the pipeline's models have their state dicts saved to CPU and then are moved to a `torch.device('meta') and loaded to GPU only @@ -400,7 +400,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") models = [ self.unet, @@ -411,7 +411,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): if cpu_offloaded_model is not None: cpu_offload(cpu_offloaded_model, device) - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -423,7 +423,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py b/src/diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py index a35f406e0a05..922e9db2f980 100644 --- a/src/diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +++ b/src/diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py @@ -295,7 +295,7 @@ def get_zero_embed(self, batch_size=1, device=None): zero_image_emb = zero_image_emb.repeat(batch_size, 1) return zero_image_emb - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, the pipeline's models have their state dicts saved to CPU and then are moved to a `torch.device('meta') and loaded to GPU only @@ -306,7 +306,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") models = [ self.image_encoder, diff --git a/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py b/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py index 4a116e1e600b..6fe5cbce7cbb 100644 --- a/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +++ b/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py @@ -110,7 +110,7 @@ def prepare_latents(self, shape, dtype, device, generator, latents, scheduler): latents = latents * scheduler.init_noise_sigma return latents - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, the pipeline's models have their state dicts saved to CPU and then are moved to a `torch.device('meta') and loaded to GPU only @@ -121,7 +121,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") models = [ self.unet, @@ -131,7 +131,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): if cpu_offloaded_model is not None: cpu_offload(cpu_offloaded_model, device) - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -143,7 +143,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py b/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py index 73fc20b5e0f2..10848a972317 100644 --- a/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +++ b/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py @@ -151,7 +151,7 @@ def prepare_latents(self, shape, dtype, device, generator, latents, scheduler): return latents # Copied from diffusers.pipelines.kandinsky2_2.pipeline_kandinsky2_2.KandinskyV22Pipeline.enable_sequential_cpu_offload - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, the pipeline's models have their state dicts saved to CPU and then are moved to a `torch.device('meta') and loaded to GPU only @@ -162,7 +162,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") models = [ self.unet, @@ -173,7 +173,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): cpu_offload(cpu_offloaded_model, device) # Copied from diffusers.pipelines.kandinsky2_2.pipeline_kandinsky2_2.KandinskyV22Pipeline.enable_model_cpu_offload - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -185,7 +185,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py b/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py index 3e001e89e490..334aac07fd92 100644 --- a/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +++ b/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py @@ -206,7 +206,7 @@ def prepare_latents(self, image, timestep, batch_size, num_images_per_prompt, dt return latents # Copied from diffusers.pipelines.kandinsky2_2.pipeline_kandinsky2_2.KandinskyV22Pipeline.enable_sequential_cpu_offload - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, the pipeline's models have their state dicts saved to CPU and then are moved to a `torch.device('meta') and loaded to GPU only @@ -217,7 +217,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") models = [ self.unet, @@ -228,7 +228,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): cpu_offload(cpu_offloaded_model, device) # Copied from diffusers.pipelines.kandinsky2_2.pipeline_kandinsky2_2.KandinskyV22Pipeline.enable_model_cpu_offload - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -240,7 +240,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py b/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py index 0a5f77b0ff2d..a53aceae7873 100644 --- a/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +++ b/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py @@ -179,7 +179,7 @@ def prepare_latents(self, image, timestep, batch_size, num_images_per_prompt, dt return latents # Copied from diffusers.pipelines.kandinsky2_2.pipeline_kandinsky2_2.KandinskyV22Pipeline.enable_sequential_cpu_offload - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, the pipeline's models have their state dicts saved to CPU and then are moved to a `torch.device('meta') and loaded to GPU only @@ -190,7 +190,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") models = [ self.unet, @@ -201,7 +201,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): cpu_offload(cpu_offloaded_model, device) # Copied from diffusers.pipelines.kandinsky2_2.pipeline_kandinsky2_2.KandinskyV22Pipeline.enable_model_cpu_offload - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -213,7 +213,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py b/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py index 151312979f81..c9c85a2cdea8 100644 --- a/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +++ b/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py @@ -278,7 +278,7 @@ def prepare_latents(self, shape, dtype, device, generator, latents, scheduler): return latents # Copied from diffusers.pipelines.kandinsky2_2.pipeline_kandinsky2_2.KandinskyV22Pipeline.enable_sequential_cpu_offload - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, the pipeline's models have their state dicts saved to CPU and then are moved to a `torch.device('meta') and loaded to GPU only @@ -289,7 +289,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") models = [ self.unet, @@ -300,7 +300,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): cpu_offload(cpu_offloaded_model, device) # Copied from diffusers.pipelines.kandinsky2_2.pipeline_kandinsky2_2.KandinskyV22Pipeline.enable_model_cpu_offload - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -312,7 +312,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py b/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py index 3b9974a5dd70..397a90762062 100644 --- a/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +++ b/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py @@ -255,7 +255,7 @@ def get_zero_embed(self, batch_size=1, device=None): return zero_image_emb # Copied from diffusers.pipelines.kandinsky.pipeline_kandinsky_prior.KandinskyPriorPipeline.enable_sequential_cpu_offload - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, the pipeline's models have their state dicts saved to CPU and then are moved to a `torch.device('meta') and loaded to GPU only @@ -266,7 +266,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") models = [ self.image_encoder, diff --git a/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py b/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py index ae46af9c4551..7d091c110e1e 100644 --- a/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +++ b/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py @@ -294,7 +294,7 @@ def get_zero_embed(self, batch_size=1, device=None): return zero_image_emb # Copied from diffusers.pipelines.kandinsky.pipeline_kandinsky_prior.KandinskyPriorPipeline.enable_sequential_cpu_offload - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, the pipeline's models have their state dicts saved to CPU and then are moved to a `torch.device('meta') and loaded to GPU only @@ -305,7 +305,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") models = [ self.image_encoder, diff --git a/src/diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py b/src/diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py index c8f3e8a9ee11..39e8036cfa83 100644 --- a/src/diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +++ b/src/diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py @@ -189,7 +189,7 @@ def __init__( self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.register_to_config(requires_safety_checker=requires_safety_checker) - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a @@ -200,7 +200,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") for cpu_offloaded_model in [self.unet, self.vae, self.image_encoder]: cpu_offload(cpu_offloaded_model, execution_device=device) diff --git a/src/diffusers/pipelines/shap_e/pipeline_shap_e.py b/src/diffusers/pipelines/shap_e/pipeline_shap_e.py index 5d96fc7bb9f4..7f19cfe766c0 100644 --- a/src/diffusers/pipelines/shap_e/pipeline_shap_e.py +++ b/src/diffusers/pipelines/shap_e/pipeline_shap_e.py @@ -130,7 +130,7 @@ def prepare_latents(self, shape, dtype, device, generator, latents, scheduler): latents = latents * scheduler.init_noise_sigma return latents - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, the pipeline's models have their state dicts saved to CPU and then are moved to a `torch.device('meta') and loaded to GPU only @@ -141,14 +141,14 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") models = [self.text_encoder, self.prior] for cpu_offloaded_model in models: if cpu_offloaded_model is not None: cpu_offload(cpu_offloaded_model, device) - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -160,7 +160,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py b/src/diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py index b99b808e5953..cfd250a7e6e6 100644 --- a/src/diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +++ b/src/diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py @@ -130,7 +130,7 @@ def prepare_latents(self, shape, dtype, device, generator, latents, scheduler): latents = latents * scheduler.init_noise_sigma return latents - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, the pipeline's models have their state dicts saved to CPU and then are moved to a `torch.device('meta') and loaded to GPU only @@ -141,7 +141,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") models = [self.image_encoder, self.prior] for cpu_offloaded_model in models: diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py index b8360f512405..9e6341e3bfd0 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py @@ -232,7 +232,7 @@ def __init__( self.register_to_config(requires_safety_checker=requires_safety_checker) # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_sequential_cpu_offload - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a @@ -245,7 +245,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) @@ -258,7 +258,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True) # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_model_cpu_offload - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -270,7 +270,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py index 9ad4d404fdbe..8d116d718a43 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py @@ -228,7 +228,7 @@ def disable_vae_tiling(self): """ self.vae.disable_tiling() - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a @@ -241,7 +241,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) @@ -253,7 +253,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): if self.safety_checker is not None: cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True) - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -265,7 +265,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py index f76268463707..ae3b0ba77c12 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py @@ -252,7 +252,7 @@ def disable_vae_slicing(self): self.vae.disable_slicing() # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_sequential_cpu_offload - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a @@ -265,7 +265,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py index 002014681040..010600f50e13 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py @@ -137,7 +137,7 @@ def __init__( self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a @@ -148,7 +148,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae, self.depth_estimator]: if cpu_offloaded_model is not None: diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py index 837811baae64..b6b803af1af2 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py @@ -404,7 +404,7 @@ def disable_vae_tiling(self): self.vae.disable_tiling() # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_sequential_cpu_offload - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a @@ -417,7 +417,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) @@ -430,7 +430,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True) # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_model_cpu_offload - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -442,7 +442,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py index 640fd7f2d94b..94b931eeceac 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py @@ -123,7 +123,7 @@ def __init__( self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.register_to_config(requires_safety_checker=requires_safety_checker) - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a @@ -134,7 +134,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") for cpu_offloaded_model in [self.unet, self.image_encoder, self.vae, self.safety_checker]: if cpu_offloaded_model is not None: diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index f8874ba2cfae..0353bf690ac8 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -228,7 +228,7 @@ def __init__( self.register_to_config(requires_safety_checker=requires_safety_checker) # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_sequential_cpu_offload - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a @@ -241,7 +241,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) @@ -254,7 +254,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True) # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_model_cpu_offload - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -266,7 +266,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py index d958f0e3fb72..b80fec41a177 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py @@ -295,7 +295,7 @@ def __init__( self.register_to_config(requires_safety_checker=requires_safety_checker) # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_sequential_cpu_offload - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a @@ -308,7 +308,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) @@ -321,7 +321,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True) # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_model_cpu_offload - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -333,7 +333,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py index 483f27ae3966..2f759166dad0 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py @@ -221,7 +221,7 @@ def __init__( self.register_to_config(requires_safety_checker=requires_safety_checker) # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_sequential_cpu_offload - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a @@ -234,7 +234,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) @@ -247,7 +247,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True) # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_model_cpu_offload - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -259,7 +259,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py index 367e401d57f8..484a1ca5b94c 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py @@ -429,7 +429,7 @@ def __call__( return StableDiffusionPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept) # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_sequential_cpu_offload - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a @@ -442,7 +442,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) @@ -455,7 +455,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True) # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_model_cpu_offload - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -467,7 +467,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py index e2800342e578..49fd275917fe 100755 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py @@ -127,7 +127,7 @@ def set_scheduler(self, scheduler_type: str): self.sampler = getattr(sampling, scheduler_type) # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_sequential_cpu_offload - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a @@ -140,7 +140,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) @@ -153,7 +153,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True) # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_model_cpu_offload - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -165,7 +165,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py index d67a7f894886..62c309fedf59 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py @@ -101,7 +101,7 @@ def __init__( self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, resample="bicubic") - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a @@ -112,7 +112,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae]: if cpu_offloaded_model is not None: diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py index 85f628ca8229..ffe84474f35a 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py @@ -193,7 +193,7 @@ def disable_vae_tiling(self): """ self.vae.disable_tiling() - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a @@ -206,7 +206,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) @@ -219,7 +219,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True) # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_model_cpu_offload - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -231,7 +231,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py index 1d30b9ee0347..fbd951d39107 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py @@ -183,7 +183,7 @@ def disable_vae_slicing(self): self.vae.disable_slicing() # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_sequential_cpu_offload - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a @@ -196,7 +196,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py index e03687e89eb1..b608ad10e625 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py @@ -145,7 +145,7 @@ def disable_vae_slicing(self): self.vae.disable_slicing() # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_sequential_cpu_offload - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a @@ -158,7 +158,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py index 2239e3853a8e..a01192339161 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py @@ -183,7 +183,7 @@ def disable_vae_tiling(self): self.vae.disable_tiling() # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_sequential_cpu_offload - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a @@ -196,7 +196,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) @@ -209,7 +209,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True) # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_model_cpu_offload - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -221,7 +221,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py index 75ac4f777756..293db5b02835 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py @@ -369,7 +369,7 @@ def __init__( self.register_to_config(requires_safety_checker=requires_safety_checker) # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_sequential_cpu_offload - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a @@ -382,7 +382,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) @@ -394,7 +394,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): if self.safety_checker is not None: cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True) - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -406,7 +406,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") hook = None for cpu_offloaded_model in [self.vae, self.text_encoder, self.unet, self.vae]: diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py index ba1c0d2b9d49..a814f4f47b26 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py @@ -164,7 +164,7 @@ def disable_vae_slicing(self): self.vae.disable_slicing() # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_sequential_cpu_offload - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a @@ -177,7 +177,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py index 48283bf31156..7739b5e8c259 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py @@ -140,7 +140,7 @@ def __init__( self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, resample="bicubic") self.register_to_config(max_noise_level=max_noise_level) - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a @@ -151,13 +151,13 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae]: if cpu_offloaded_model is not None: cpu_offload(cpu_offloaded_model, device) - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -169,7 +169,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py index e36ebfbb70f1..f8210541cf3a 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py @@ -158,7 +158,7 @@ def disable_vae_slicing(self): """ self.vae.disable_slicing() - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, the pipeline's models have their state dicts saved to CPU and then are moved to a `torch.device('meta') and loaded to GPU only @@ -169,7 +169,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") # TODO: self.prior.post_process_latents and self.image_noiser.{scale,unscale} are not covered by the offload hooks, so they fails if added to the list models = [ @@ -182,7 +182,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): if cpu_offloaded_model is not None: cpu_offload(cpu_offloaded_model, device) - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -194,7 +194,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py index 0187c86b4239..c5af3a2b618e 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py @@ -160,7 +160,7 @@ def disable_vae_slicing(self): """ self.vae.disable_slicing() - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, the pipeline's models have their state dicts saved to CPU and then are moved to a `torch.device('meta') and loaded to GPU only @@ -171,7 +171,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") # TODO: self.image_normalizer.{scale,unscale} are not covered by the offload hooks, so they fails if added to the list models = [ @@ -184,7 +184,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): if cpu_offloaded_model is not None: cpu_offload(cpu_offloaded_model, device) - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -196,7 +196,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py index 142aac94b99d..b17d2ff88778 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py @@ -164,7 +164,7 @@ def disable_vae_tiling(self): """ self.vae.disable_tiling() - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a @@ -177,7 +177,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) @@ -186,7 +186,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae]: cpu_offload(cpu_offloaded_model, device) - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -198,7 +198,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py index f699e2331027..ce77c6c117a7 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py @@ -172,7 +172,7 @@ def disable_vae_tiling(self): """ self.vae.disable_tiling() - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a @@ -185,7 +185,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) @@ -194,7 +194,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae]: cpu_offload(cpu_offloaded_model, device) - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -206,7 +206,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py index ecc330b5f504..b5068564bd04 100644 --- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py @@ -149,7 +149,7 @@ def disable_vae_tiling(self): """ self.vae.disable_tiling() - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae have their state dicts saved to CPU and then are moved to a `torch.device('meta') and loaded @@ -161,7 +161,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) @@ -170,7 +170,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae]: cpu_offload(cpu_offloaded_model, device) - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -182,7 +182,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py index 7a4b73cd3c35..293e042be880 100644 --- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py @@ -211,7 +211,7 @@ def disable_vae_tiling(self): """ self.vae.disable_tiling() - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae have their state dicts saved to CPU and then are moved to a `torch.device('meta') and loaded @@ -223,7 +223,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) @@ -232,7 +232,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae]: cpu_offload(cpu_offloaded_model, device) - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -244,7 +244,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/unclip/pipeline_unclip.py b/src/diffusers/pipelines/unclip/pipeline_unclip.py index abbb48ce8f46..b17ceff93c7c 100644 --- a/src/diffusers/pipelines/unclip/pipeline_unclip.py +++ b/src/diffusers/pipelines/unclip/pipeline_unclip.py @@ -205,7 +205,7 @@ def _encode_prompt( return prompt_embeds, text_encoder_hidden_states, text_mask - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, the pipeline's models have their state dicts saved to CPU and then are moved to a `torch.device('meta') and loaded to GPU only @@ -216,7 +216,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") # TODO: self.prior.post_process_latents is not covered by the offload hooks, so it fails if added to the list models = [ diff --git a/src/diffusers/pipelines/unclip/pipeline_unclip_image_variation.py b/src/diffusers/pipelines/unclip/pipeline_unclip_image_variation.py index 30d74cd36bb0..0d4c57847ce6 100644 --- a/src/diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +++ b/src/diffusers/pipelines/unclip/pipeline_unclip_image_variation.py @@ -198,7 +198,7 @@ def _encode_image(self, image, device, num_images_per_prompt, image_embeddings: return image_embeddings - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, the pipeline's models have their state dicts saved to CPU and then are moved to a `torch.device('meta') and loaded to GPU only @@ -209,7 +209,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") models = [ self.decoder, diff --git a/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py b/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py index e25da12414e7..1e2dac606ba4 100644 --- a/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +++ b/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py @@ -174,7 +174,7 @@ def __init__( # Modified from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_sequential_cpu_offload # Add self.image_encoder, self.text_decoder to cpu_offloaded_models list - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a @@ -187,7 +187,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) @@ -201,7 +201,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): # Modified from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_model_cpu_offload # Add self.image_encoder, self.text_decoder to cpu_offloaded_models list - def enable_model_cpu_offload(self, gpu_id=0): + def enable_model_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` @@ -213,7 +213,7 @@ def enable_model_cpu_offload(self, gpu_id=0): else: raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") if self.device.type != "cpu": self.to("cpu", silence_dtype_warnings=True) diff --git a/src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py b/src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py index 1d2e61d86b90..f31cb9c26c2f 100644 --- a/src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +++ b/src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py @@ -148,7 +148,7 @@ def _revert_dual_attention(self): self.image_unet.register_to_config(dual_cross_attention=False) - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a @@ -159,7 +159,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") for cpu_offloaded_model in [self.image_unet, self.text_unet, self.text_encoder, self.vae]: if cpu_offloaded_model is not None: diff --git a/src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py b/src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py index 4450846300fc..137a72a8fc6a 100644 --- a/src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +++ b/src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py @@ -75,7 +75,7 @@ def __init__( self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a @@ -86,7 +86,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") for cpu_offloaded_model in [self.image_unet, self.text_unet, self.text_encoder, self.vae]: if cpu_offloaded_model is not None: diff --git a/src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py b/src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py index 1fdb21f2b745..6dfb25ffc669 100644 --- a/src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +++ b/src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py @@ -99,7 +99,7 @@ def _swap_unet_attention_blocks(self): def remove_unused_weights(self): self.register_modules(text_unet=None) - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a @@ -110,7 +110,7 @@ def enable_sequential_cpu_offload(self, gpu_id=0): else: raise ImportError("Please install accelerate via `pip install accelerate`") - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"{gpu}:{gpu_id}") for cpu_offloaded_model in [self.image_unet, self.text_unet, self.text_encoder, self.vae]: if cpu_offloaded_model is not None: From 9411a63b3394b77b580a4a699b80f73dde75fd0c Mon Sep 17 00:00:00 2001 From: Disty0 Date: Fri, 7 Jul 2023 13:18:25 +0300 Subject: [PATCH 2/2] Add gpu option to cpu offload --- examples/community/stable_diffusion_repaint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/community/stable_diffusion_repaint.py b/examples/community/stable_diffusion_repaint.py index c34fed237174..025f1a70ee98 100644 --- a/examples/community/stable_diffusion_repaint.py +++ b/examples/community/stable_diffusion_repaint.py @@ -276,7 +276,7 @@ def __init__( self.register_to_config(requires_safety_checker=requires_safety_checker) # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_sequential_cpu_offload - def enable_sequential_cpu_offload(self, gpu_id=0): + def enable_sequential_cpu_offload(self, gpu="cuda", gpu_id=0): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a