From caec7956462f036dcc5840190cb210654c4a6f9f Mon Sep 17 00:00:00 2001 From: Vladimir Sotnikov Date: Tue, 6 Dec 2022 16:09:02 +0200 Subject: [PATCH 1/9] [Stable Diffusion Img2Img] resize source images to integer multiple of 8 instead of 32 --- .../stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py | 2 +- .../stable_diffusion/pipeline_stable_diffusion_img2img.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py index 2242d21b1d91..7a1a6eed69d2 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py @@ -34,7 +34,7 @@ def preprocess(image): w, h = image.size - w, h = map(lambda x: x - x % 32, (w, h)) # resize to integer multiple of 32 + w, h = map(lambda x: x - x % 8, (w, h)) # resize to integer multiple of 8 image = image.resize((w, h), resample=PIL_INTERPOLATION["lanczos"]) image = np.array(image).astype(np.float32) / 255.0 image = image[None].transpose(0, 3, 1, 2) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index 2879fd275543..15572e89e00d 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -44,7 +44,7 @@ def preprocess(image): w, h = image.size - w, h = map(lambda x: x - x % 32, (w, h)) # resize to integer multiple of 32 + w, h = map(lambda x: x - x % 8, (w, h)) # resize to integer multiple of 8 image = image.resize((w, h), resample=PIL_INTERPOLATION["lanczos"]) image = np.array(image).astype(np.float32) / 255.0 image = image[None].transpose(0, 3, 1, 2) From 875ee0c3f34b3744c74701b1d4ebaec3cb00fad8 Mon Sep 17 00:00:00 2001 From: Vladimir Sotnikov Date: Tue, 6 Dec 2022 16:59:00 +0200 Subject: [PATCH 2/9] [Alt Diffusion Img2Img] resize source images to multiple of 8 instead of 32 --- .../pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py index 28aea4acc2b0..76a8474ba6c5 100644 --- a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +++ b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py @@ -45,7 +45,7 @@ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess def preprocess(image): w, h = image.size - w, h = map(lambda x: x - x % 32, (w, h)) # resize to integer multiple of 32 + w, h = map(lambda x: x - x % 8, (w, h)) # resize to integer multiple of 8 image = image.resize((w, h), resample=PIL_INTERPOLATION["lanczos"]) image = np.array(image).astype(np.float32) / 255.0 image = image[None].transpose(0, 3, 1, 2) From 0dc8d0acfcfc216d5c6289ae6540da19bcf8cf14 Mon Sep 17 00:00:00 2001 From: Vladimir Sotnikov Date: Thu, 12 Jan 2023 22:40:50 +0200 Subject: [PATCH 3/9] [Img2Img] fix AltDiffusion Img2Img resolution test --- .../test_alt_diffusion_img2img.py | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/tests/pipelines/altdiffusion/test_alt_diffusion_img2img.py b/tests/pipelines/altdiffusion/test_alt_diffusion_img2img.py index 761b2c013401..27d8899e1221 100644 --- a/tests/pipelines/altdiffusion/test_alt_diffusion_img2img.py +++ b/tests/pipelines/altdiffusion/test_alt_diffusion_img2img.py @@ -207,6 +207,43 @@ def test_stable_diffusion_img2img_fp16(self): assert image.shape == (1, 32, 32, 3) + @unittest.skipIf(torch_device != "cuda", "This test requires a GPU") + def test_stable_diffusion_img2img_pipeline_multiple_of_8(self): + init_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" + "/img2img/sketch-mountains-input.jpg" + ) + # resize to resolution that is divisible by 8 but not 16 or 32 + init_image = init_image.resize((760, 504)) + + model_id = "BAAI/AltDiffusion" + pipe = AltDiffusionImg2ImgPipeline.from_pretrained( + model_id, + safety_checker=None, + ) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + prompt = "A fantasy landscape, trending on artstation" + + generator = torch.Generator(device=torch_device).manual_seed(0) + output = pipe( + prompt=prompt, + image=init_image, + strength=0.75, + guidance_scale=7.5, + generator=generator, + output_type="np", + ) + image = output.images[0] + + image_slice = image[255:258, 383:386, -1] + + assert image.shape == (504, 760, 3) + expected_slice = np.array([0.3252, 0.3340, 0.3418, 0.3263, 0.3346, 0.3300, 0.3163, 0.3470, 0.3427]) + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 + @slow @require_torch_gpu From 0cc4792384527a4710ecd6993ae1542ffc0338e7 Mon Sep 17 00:00:00 2001 From: Vladimir Sotnikov Date: Thu, 12 Jan 2023 23:38:53 +0200 Subject: [PATCH 4/9] [Img2Img] add Stable Diffusion Img2Img resolution test --- .../test_stable_diffusion_img2img.py | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py index fe6dc729e63e..7877d9ec23f3 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py @@ -333,6 +333,42 @@ def test_stable_diffusion_pipeline_with_sequential_cpu_offloading(self): # make sure that less than 2.2 GB is allocated assert mem_bytes < 2.2 * 10**9 + def test_stable_diffusion_img2img_pipeline_multiple_of_8(self): + init_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" + "/img2img/sketch-mountains-input.jpg" + ) + # resize to resolution that is divisible by 8 but not 16 or 32 + init_image = init_image.resize((760, 504)) + + model_id = "CompVis/stable-diffusion-v1-4" + pipe = StableDiffusionImg2ImgPipeline.from_pretrained( + model_id, + safety_checker=None, + ) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + prompt = "A fantasy landscape, trending on artstation" + + generator = torch.Generator(device=torch_device).manual_seed(0) + output = pipe( + prompt=prompt, + image=init_image, + strength=0.75, + guidance_scale=7.5, + generator=generator, + output_type="np", + ) + image = output.images[0] + + image_slice = image[255:258, 383:386, -1] + + assert image.shape == (504, 760, 3) + expected_slice = np.array([0.7124, 0.7105, 0.6993, 0.7140, 0.7106, 0.6945, 0.7198, 0.7172, 0.7031]) + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 + @nightly @require_torch_gpu From a90b7e7138844cf91a52bed2b54925cf1bf77747 Mon Sep 17 00:00:00 2001 From: Vladimir Sotnikov Date: Thu, 12 Jan 2023 23:48:44 +0200 Subject: [PATCH 5/9] [Cycle Diffusion] round resolution to multiplies of 8 instead of 32 --- .../pipelines/stable_diffusion/pipeline_cycle_diffusion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py index d47a6783a5b1..23df52d818f3 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py @@ -44,7 +44,7 @@ def preprocess(image): if isinstance(image[0], PIL.Image.Image): w, h = image[0].size - w, h = map(lambda x: x - x % 32, (w, h)) # resize to integer multiple of 32 + w, h = map(lambda x: x - x % 8, (w, h)) # resize to integer multiple of 8 image = [np.array(i.resize((w, h), resample=PIL_INTERPOLATION["lanczos"]))[None, :] for i in image] image = np.concatenate(image, axis=0) From 50cbde7e7771b380f75b24fdfce3e76875b51d8e Mon Sep 17 00:00:00 2001 From: Vladimir Sotnikov Date: Thu, 12 Jan 2023 23:49:29 +0200 Subject: [PATCH 6/9] [ONNX SD Img2Img] round resolution to multiplies of 64 instead of 32 --- .../stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py index 0a00f52f8c8a..e55a7e5c2e76 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py @@ -41,7 +41,7 @@ def preprocess(image): if isinstance(image[0], PIL.Image.Image): w, h = image[0].size - w, h = map(lambda x: x - x % 8, (w, h)) # resize to integer multiple of 8 + w, h = map(lambda x: x - x % 64, (w, h)) # resize to integer multiple of 64 image = [np.array(i.resize((w, h), resample=PIL_INTERPOLATION["lanczos"]))[None, :] for i in image] image = np.concatenate(image, axis=0) From 5c5fe9655d0b9077578c2e09005bfe94c7628696 Mon Sep 17 00:00:00 2001 From: Vladimir Sotnikov Date: Thu, 12 Jan 2023 23:54:58 +0200 Subject: [PATCH 7/9] [SD Depth2Img] round resolution to multiplies of 8 instead of 32 --- .../stable_diffusion/pipeline_stable_diffusion_depth2img.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py index b8123ecfe05d..7e876f49c68f 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py @@ -49,7 +49,7 @@ def preprocess(image): if isinstance(image[0], PIL.Image.Image): w, h = image[0].size - w, h = map(lambda x: x - x % 32, (w, h)) # resize to integer multiple of 32 + w, h = map(lambda x: x - x % 8, (w, h)) # resize to integer multiple of 8 image = [np.array(i.resize((w, h), resample=PIL_INTERPOLATION["lanczos"]))[None, :] for i in image] image = np.concatenate(image, axis=0) From 7a44f660f30f64b39fd51ec0455f5e98aeec09cf Mon Sep 17 00:00:00 2001 From: Vladimir Sotnikov Date: Thu, 12 Jan 2023 23:55:06 +0200 Subject: [PATCH 8/9] [Repaint] round resolution to multiplies of 8 instead of 32 --- src/diffusers/pipelines/repaint/pipeline_repaint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/pipelines/repaint/pipeline_repaint.py b/src/diffusers/pipelines/repaint/pipeline_repaint.py index b93b02aba4d9..a3298e72399a 100644 --- a/src/diffusers/pipelines/repaint/pipeline_repaint.py +++ b/src/diffusers/pipelines/repaint/pipeline_repaint.py @@ -38,7 +38,7 @@ def _preprocess_image(image: Union[List, PIL.Image.Image, torch.Tensor]): if isinstance(image[0], PIL.Image.Image): w, h = image[0].size - w, h = map(lambda x: x - x % 32, (w, h)) # resize to integer multiple of 32 + w, h = map(lambda x: x - x % 8, (w, h)) # resize to integer multiple of 8 image = [np.array(i.resize((w, h), resample=PIL_INTERPOLATION["lanczos"]))[None, :] for i in image] image = np.concatenate(image, axis=0) From c42de0f1908b0305969260f2d049db7e9a383d3f Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Fri, 13 Jan 2023 14:46:39 +0000 Subject: [PATCH 9/9] fix make style --- .../stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py index e55a7e5c2e76..19f41f5e7bd7 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py @@ -32,7 +32,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name -# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess +# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess with 8->64 def preprocess(image): if isinstance(image, torch.Tensor): return image