From 1e075a12bceb848a8c45451d0aa0eaa1cda9e257 Mon Sep 17 00:00:00 2001 From: Marcus Wong Date: Wed, 20 Dec 2023 13:31:46 +0800 Subject: [PATCH 1/5] fix RuntimeError: Input type (float) and bias type (c10::Half) should be the same --- .../stable_diffusion/pipeline_stable_diffusion.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py index b05d0b17dd5a..708ba861a1dd 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py @@ -1031,9 +1031,10 @@ def __call__( callback(step_idx, t, latents) if not output_type == "latent": - image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False, generator=generator)[ - 0 - ] + with torch.cuda.amp.autocast(): + image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False, generator=generator)[ + 0 + ] image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype) else: image = latents From 24438a33c1b3df57ae68d5519f375827ecf323ac Mon Sep 17 00:00:00 2001 From: Marcus Wong Date: Wed, 20 Dec 2023 13:53:34 +0800 Subject: [PATCH 2/5] format source code From cef7d3a369fdeb36a26d1820346f03dd06fb3204 Mon Sep 17 00:00:00 2001 From: Marcus Wong Date: Wed, 20 Dec 2023 14:04:29 +0800 Subject: [PATCH 3/5] format code --- .../pipelines/stable_diffusion/pipeline_stable_diffusion.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py index 708ba861a1dd..f49cd7e2092f 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py @@ -1032,9 +1032,9 @@ def __call__( if not output_type == "latent": with torch.cuda.amp.autocast(): - image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False, generator=generator)[ - 0 - ] + image = self.vae.decode( + latents / self.vae.config.scaling_factor, return_dict=False, generator=generator + )[0] image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype) else: image = latents From f7b86c68c3f2774278ec2e7c0f5bf893ee553812 Mon Sep 17 00:00:00 2001 From: Marcus Wong Date: Thu, 21 Dec 2023 13:23:24 +0800 Subject: [PATCH 4/5] remove the autocast blocks within the pipeline --- .../stable_diffusion/pipeline_stable_diffusion.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py index f49cd7e2092f..8dc512bd8385 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py @@ -1031,10 +1031,9 @@ def __call__( callback(step_idx, t, latents) if not output_type == "latent": - with torch.cuda.amp.autocast(): - image = self.vae.decode( - latents / self.vae.config.scaling_factor, return_dict=False, generator=generator - )[0] + image = self.vae.decode( + latents / self.vae.config.scaling_factor, return_dict=False, generator=generator + )[0] image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype) else: image = latents From 9abdbe1050a7787880c995a56b639fb533842b50 Mon Sep 17 00:00:00 2001 From: Marcus Wong Date: Thu, 21 Dec 2023 13:30:29 +0800 Subject: [PATCH 5/5] add autocast blocks to pipeline caller in train_text_to_image_lora.py --- .../text_to_image/train_text_to_image_lora.py | 16 ++++++++++------ .../pipeline_stable_diffusion.py | 6 +++--- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/examples/text_to_image/train_text_to_image_lora.py b/examples/text_to_image/train_text_to_image_lora.py index c8efbddd0b44..afbf80404872 100644 --- a/examples/text_to_image/train_text_to_image_lora.py +++ b/examples/text_to_image/train_text_to_image_lora.py @@ -844,10 +844,11 @@ def collate_fn(examples): if args.seed is not None: generator = generator.manual_seed(args.seed) images = [] - for _ in range(args.num_validation_images): - images.append( - pipeline(args.validation_prompt, num_inference_steps=30, generator=generator).images[0] - ) + with torch.cuda.amp.autocast(): + for _ in range(args.num_validation_images): + images.append( + pipeline(args.validation_prompt, num_inference_steps=30, generator=generator).images[0] + ) for tracker in accelerator.trackers: if tracker.name == "tensorboard": @@ -913,8 +914,11 @@ def collate_fn(examples): if args.seed is not None: generator = generator.manual_seed(args.seed) images = [] - for _ in range(args.num_validation_images): - images.append(pipeline(args.validation_prompt, num_inference_steps=30, generator=generator).images[0]) + with torch.cuda.amp.autocast(): + for _ in range(args.num_validation_images): + images.append( + pipeline(args.validation_prompt, num_inference_steps=30, generator=generator).images[0] + ) for tracker in accelerator.trackers: if len(images) != 0: diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py index 8dc512bd8385..b05d0b17dd5a 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py @@ -1031,9 +1031,9 @@ def __call__( callback(step_idx, t, latents) if not output_type == "latent": - image = self.vae.decode( - latents / self.vae.config.scaling_factor, return_dict=False, generator=generator - )[0] + image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False, generator=generator)[ + 0 + ] image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype) else: image = latents