From 4f7ad4154cb4364a6c97fd6cf976b5b0704de93e Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Mon, 7 Nov 2022 10:58:48 +0100 Subject: [PATCH 1/7] Schedulers: don't use float64 on mps --- .../schedulers/scheduling_euler_ancestral_discrete.py | 6 +++++- src/diffusers/schedulers/scheduling_euler_discrete.py | 6 +++++- src/diffusers/schedulers/scheduling_lms_discrete.py | 7 ++++++- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py b/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py index fe45b3d591f5..e4da82da5118 100644 --- a/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py +++ b/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py @@ -150,7 +150,11 @@ def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.devic sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas) sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32) self.sigmas = torch.from_numpy(sigmas).to(device=device) - self.timesteps = torch.from_numpy(timesteps).to(device=device) + if str(device).startswith("mps"): + # mps does not support float64 + self.timesteps = torch.from_numpy(timesteps).to(device, dtype=torch.float32) + else: + self.timesteps = torch.from_numpy(timesteps).to(device=device) def step( self, diff --git a/src/diffusers/schedulers/scheduling_euler_discrete.py b/src/diffusers/schedulers/scheduling_euler_discrete.py index 0cb31a451272..0f7eff66f140 100644 --- a/src/diffusers/schedulers/scheduling_euler_discrete.py +++ b/src/diffusers/schedulers/scheduling_euler_discrete.py @@ -151,7 +151,11 @@ def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.devic sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas) sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32) self.sigmas = torch.from_numpy(sigmas).to(device=device) - self.timesteps = torch.from_numpy(timesteps).to(device=device) + if str(device).startswith("mps"): + # mps does not support float64 + self.timesteps = torch.from_numpy(timesteps).to(device, dtype=torch.float32) + else: + self.timesteps = torch.from_numpy(timesteps).to(device=device) def step( self, diff --git a/src/diffusers/schedulers/scheduling_lms_discrete.py b/src/diffusers/schedulers/scheduling_lms_discrete.py index 8d633267c607..e961dccc69c7 100644 --- a/src/diffusers/schedulers/scheduling_lms_discrete.py +++ b/src/diffusers/schedulers/scheduling_lms_discrete.py @@ -172,8 +172,13 @@ def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.devic sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5) sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas) sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32) + self.sigmas = torch.from_numpy(sigmas).to(device=device) - self.timesteps = torch.from_numpy(timesteps).to(device=device) + if str(device).startswith("mps"): + # mps does not support float64 + self.timesteps = torch.from_numpy(timesteps).to(device, dtype=torch.float32) + else: + self.timesteps = torch.from_numpy(timesteps).to(device=device) self.derivatives = [] From 2ef622d034458b9486a676a9699a67e38b965408 Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Mon, 7 Nov 2022 11:01:33 +0100 Subject: [PATCH 2/7] Test set_timesteps() on device (float schedulers). --- tests/test_scheduler.py | 87 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 86 insertions(+), 1 deletion(-) diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py index 29186aaac99b..153844b2f2df 100755 --- a/tests/test_scheduler.py +++ b/tests/test_scheduler.py @@ -82,8 +82,8 @@ def check_over_configs(self, time_step=0, **config): num_inference_steps = kwargs.pop("num_inference_steps", None) - # TODO(Suraj) - delete the following two lines once DDPM, DDIM, and PNDM have timesteps casted to float by default for scheduler_class in self.scheduler_classes: + # TODO(Suraj) - delete the following two lines once DDPM, DDIM, and PNDM have timesteps casted to float by default if scheduler_class in (EulerAncestralDiscreteScheduler, EulerDiscreteScheduler, LMSDiscreteScheduler): time_step = float(time_step) @@ -1010,6 +1010,31 @@ def test_full_loop_no_noise(self): assert abs(result_sum.item() - 1006.388) < 1e-2 assert abs(result_mean.item() - 1.31) < 1e-3 + def test_full_loop_device(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(self.num_inference_steps, device=torch_device) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma + sample = sample.to(torch_device) + + for i, t in enumerate(scheduler.timesteps): + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 1006.388) < 1e-2 + assert abs(result_mean.item() - 1.31) < 1e-3 + class EulerDiscreteSchedulerTest(SchedulerCommonTest): scheduler_classes = (EulerDiscreteScheduler,) @@ -1066,6 +1091,34 @@ def test_full_loop_no_noise(self): assert abs(result_sum.item() - 10.0807) < 1e-2 assert abs(result_mean.item() - 0.0131) < 1e-3 + def test_full_loop_device(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(self.num_inference_steps, device=torch_device) + + generator = torch.Generator().manual_seed(0) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma + sample = sample.to(torch_device) + + for t in scheduler.timesteps: + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample, generator=generator) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + print(result_sum, result_mean) + + assert abs(result_sum.item() - 10.0807) < 1e-2 + assert abs(result_mean.item() - 0.0131) < 1e-3 + class EulerAncestralDiscreteSchedulerTest(SchedulerCommonTest): scheduler_classes = (EulerAncestralDiscreteScheduler,) @@ -1121,6 +1174,38 @@ def test_full_loop_no_noise(self): assert abs(result_sum.item() - 152.3192) < 1e-2 assert abs(result_mean.item() - 0.1983) < 1e-3 + def test_full_loop_device(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(self.num_inference_steps, device=torch_device) + + generator = torch.Generator().manual_seed(0) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma + sample = sample.to(torch_device) + + for t in scheduler.timesteps: + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample, generator=generator) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + print(result_sum, result_mean) + if not str(torch_device).startswith("mps"): + # The following sum varies between 148 and 156 on mps. Why? + assert abs(result_sum.item() - 152.3192) < 1e-2 + assert abs(result_mean.item() - 0.1983) < 1e-3 + else: + # Larger tolerance on mps + assert abs(result_mean.item() - 0.1983) < 1e-2 + class IPNDMSchedulerTest(SchedulerCommonTest): scheduler_classes = (IPNDMScheduler,) From eb52838e357b566ead8b314c2dab449b3b79f457 Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Mon, 7 Nov 2022 11:02:15 +0100 Subject: [PATCH 3/7] SD pipeline: use device in set_timesteps. --- .../stable_diffusion/pipeline_stable_diffusion.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py index 1ccc87804e68..a4f224a0c267 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py @@ -354,12 +354,9 @@ def __call__( raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {latents_shape}") latents = latents.to(self.device) - # set timesteps - self.scheduler.set_timesteps(num_inference_steps) - - # Some schedulers like PNDM have timesteps as arrays - # It's more optimized to move all timesteps to correct device beforehand - timesteps_tensor = self.scheduler.timesteps.to(self.device) + # set timesteps and move to the correct device + self.scheduler.set_timesteps(num_inference_steps, device=self.device) + timesteps_tensor = self.scheduler.timesteps # scale the initial noise by the standard deviation required by the scheduler latents = latents * self.scheduler.init_noise_sigma From e246b67633902bbdd1b79b9573b3e5f95b29e8ab Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Mon, 7 Nov 2022 11:09:15 +0100 Subject: [PATCH 4/7] SD in-painting pipeline: use device in set_timesteps. --- .../pipeline_stable_diffusion_inpaint.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py index 34e8231c63ee..251b37627a39 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py @@ -416,12 +416,9 @@ def __call__( " `pipeline.unet` or your `mask_image` or `image` input." ) - # set timesteps - self.scheduler.set_timesteps(num_inference_steps) - - # Some schedulers like PNDM have timesteps as arrays - # It's more optimized to move all timesteps to correct device beforehand - timesteps_tensor = self.scheduler.timesteps.to(self.device) + # set timesteps and move to the correct device + self.scheduler.set_timesteps(num_inference_steps, device=self.device) + timesteps_tensor = self.scheduler.timesteps # scale the initial noise by the standard deviation required by the scheduler latents = latents * self.scheduler.init_noise_sigma From c44f1b9ac95b561253d18482a90f2a4a8c4c1cde Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Mon, 7 Nov 2022 11:33:58 +0100 Subject: [PATCH 5/7] Tests: fix mps crashes. --- tests/models/test_models_unet_2d.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/models/test_models_unet_2d.py b/tests/models/test_models_unet_2d.py index 71ddf1a13414..81437311c658 100644 --- a/tests/models/test_models_unet_2d.py +++ b/tests/models/test_models_unet_2d.py @@ -456,6 +456,7 @@ def get_encoder_hidden_states(self, seed=0, shape=(4, 77, 768), fp16=False): # fmt: on ] ) + @require_torch_gpu def test_compvis_sd_v1_4(self, seed, timestep, expected_slice): model = self.get_unet_model(model_id="CompVis/stable-diffusion-v1-4") latents = self.get_latents(seed) @@ -507,6 +508,7 @@ def test_compvis_sd_v1_4_fp16(self, seed, timestep, expected_slice): # fmt: on ] ) + @require_torch_gpu def test_compvis_sd_v1_5(self, seed, timestep, expected_slice): model = self.get_unet_model(model_id="runwayml/stable-diffusion-v1-5") latents = self.get_latents(seed) @@ -558,6 +560,7 @@ def test_compvis_sd_v1_5_fp16(self, seed, timestep, expected_slice): # fmt: on ] ) + @require_torch_gpu def test_compvis_sd_inpaint(self, seed, timestep, expected_slice): model = self.get_unet_model(model_id="runwayml/stable-diffusion-inpainting") latents = self.get_latents(seed, shape=(4, 9, 64, 64)) From e808fd167790c72a71374fa4c9698b65f64b089b Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Mon, 7 Nov 2022 12:43:47 +0100 Subject: [PATCH 6/7] Skip test_load_pipeline_from_git on mps. Not compatible with float16. --- tests/test_pipelines.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py index b8316075fa93..2b19b08b3742 100644 --- a/tests/test_pipelines.py +++ b/tests/test_pipelines.py @@ -41,7 +41,7 @@ from diffusers.pipeline_utils import DiffusionPipeline from diffusers.schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME from diffusers.utils import CONFIG_NAME, WEIGHTS_NAME, floats_tensor, slow, torch_device -from diffusers.utils.testing_utils import CaptureLogger, get_tests_dir +from diffusers.utils.testing_utils import CaptureLogger, get_tests_dir, require_torch_gpu from parameterized import parameterized from PIL import Image from transformers import CLIPFeatureExtractor, CLIPModel, CLIPTextConfig, CLIPTextModel, CLIPTokenizer @@ -124,7 +124,7 @@ def test_local_custom_pipeline(self): assert output_str == "This is a local test" @slow - @unittest.skipIf(torch_device == "cpu", "Stable diffusion is supposed to run on GPU") + @require_torch_gpu def test_load_pipeline_from_git(self): clip_model_id = "laion/CLIP-ViT-B-32-laion2B-s34B-b79K" From a218dc93c179f092f39691ad6ae42f96e75bdf3c Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Tue, 8 Nov 2022 13:01:17 +0100 Subject: [PATCH 7/7] Use device.type instead of str in Euler schedulers. --- .../schedulers/scheduling_euler_ancestral_discrete.py | 4 ++-- src/diffusers/schedulers/scheduling_euler_discrete.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py b/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py index 704f25f3e6de..33505c81c004 100644 --- a/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py +++ b/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py @@ -221,8 +221,8 @@ def step( prev_sample = sample + derivative * dt - device = model_output.device if torch.is_tensor(model_output) else "cpu" - if str(device) == "mps": + device = model_output.device if torch.is_tensor(model_output) else torch.device("cpu") + if device.type == "mps": # randn does not work reproducibly on mps noise = torch.randn(model_output.shape, dtype=model_output.dtype, device="cpu", generator=generator).to( device diff --git a/src/diffusers/schedulers/scheduling_euler_discrete.py b/src/diffusers/schedulers/scheduling_euler_discrete.py index ba1ced7caddf..9f707c27a15b 100644 --- a/src/diffusers/schedulers/scheduling_euler_discrete.py +++ b/src/diffusers/schedulers/scheduling_euler_discrete.py @@ -218,8 +218,8 @@ def step( gamma = min(s_churn / (len(self.sigmas) - 1), 2**0.5 - 1) if s_tmin <= sigma <= s_tmax else 0.0 - device = model_output.device if torch.is_tensor(model_output) else "cpu" - if str(device) == "mps": + device = model_output.device if torch.is_tensor(model_output) else torch.device("cpu") + if device.type == "mps": # randn does not work reproducibly on mps noise = torch.randn(model_output.shape, dtype=model_output.dtype, device="cpu", generator=generator).to( device