From 2657ee7b39041b1bf22687deef7ecf6f5e2c6ba4 Mon Sep 17 00:00:00 2001
From: Hanusz Leszek <leszek.hanusz@gmail.com>
Date: Sun, 11 Sep 2022 17:48:54 +0200
Subject: [PATCH 1/3] add noise to initial latents depending on mask and
 strength

---
 .../pipeline_stable_diffusion_inpaint.py       | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
index 05ea84ae0326..78f0658f35b5 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
@@ -201,6 +201,10 @@ def __call__(
 
         self.scheduler.set_timesteps(num_inference_steps, **extra_set_kwargs)
 
+        # preprocess mask
+        mask = preprocess_mask(mask_image).to(self.device)
+        mask = torch.cat([mask] * batch_size)
+
         # preprocess image
         init_image = preprocess_image(init_image).to(self.device)
 
@@ -208,16 +212,22 @@ def __call__(
         init_latent_dist = self.vae.encode(init_image.to(self.device)).latent_dist
         init_latents = init_latent_dist.sample(generator=generator)
 
+        # adding noise to the masked areas depending on strength
+        rand_latents = torch.randn(
+            init_latents.shape,
+            generator=generator,
+            device=self.device,
+        )
+        init_latents_noised = init_latents * mask + rand_latents * (1 - mask)
+        init_latents = init_latents * (1 - strength) + init_latents_noised * strength
+
+        # multiply by scale_factor
         init_latents = 0.18215 * init_latents
 
         # Expand init_latents for batch_size
         init_latents = torch.cat([init_latents] * batch_size)
         init_latents_orig = init_latents
 
-        # preprocess mask
-        mask = preprocess_mask(mask_image).to(self.device)
-        mask = torch.cat([mask] * batch_size)
-
         # check sizes
         if not mask.shape == init_latents.shape:
             raise ValueError("The mask and init_image should be the same size!")

From 15d9c18b952d78d92b1dd4d68dc7da6d414a1deb Mon Sep 17 00:00:00 2001
From: Hanusz Leszek <leszek.hanusz@gmail.com>
Date: Sun, 11 Sep 2022 18:24:08 +0200
Subject: [PATCH 2/3] Fix inpaint test values

---
 tests/test_pipelines.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py
index a2f63705b035..9e071cfa63ba 100644
--- a/tests/test_pipelines.py
+++ b/tests/test_pipelines.py
@@ -694,7 +694,9 @@ def test_stable_diffusion_inpaint(self):
         image_from_tuple_slice = image_from_tuple[0, -3:, -3:, -1]
 
         assert image.shape == (1, 32, 32, 3)
-        expected_slice = np.array([0.4731, 0.5346, 0.4531, 0.6251, 0.5446, 0.4057, 0.5527, 0.5896, 0.5153])
+        expected_slice = np.array(
+            [0.4893303, 0.5381786, 0.46649122, 0.62859786, 0.53987336, 0.39735478, 0.5483682, 0.59601367, 0.5178648]
+        )
         assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
         assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
 

From f36b588901934a7b3c10bbe560806b8612996e41 Mon Sep 17 00:00:00 2001
From: Hanusz Leszek <leszek.hanusz@gmail.com>
Date: Sun, 18 Sep 2022 11:57:41 +0200
Subject: [PATCH 3/3] fix merge

---
 .../stable_diffusion/pipeline_stable_diffusion_inpaint.py       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
index 374de34fa4cc..41f426532754 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
@@ -216,7 +216,7 @@ def __call__(
         if not isinstance(mask_image, torch.FloatTensor):
             mask_image = preprocess_mask(mask_image)
         mask_image = mask_image.to(self.device)
-        mask = torch.cat([mask] * batch_size)
+        mask = torch.cat([mask_image] * batch_size)
 
         # preprocess image
         if not isinstance(init_image, torch.FloatTensor):