From b5e067f51d90d5597a12db18c029d4c1aed7b963 Mon Sep 17 00:00:00 2001
From: Steven Liu <steven.liu@huggingface.co>
Date: Wed, 17 May 2023 14:44:54 -0700
Subject: [PATCH 1/2] add textual inversion inference to docs

---
 docs/source/en/_toctree.yml                   |  2 +
 .../textual_inversion_inference               | 82 +++++++++++++++++++
 2 files changed, 84 insertions(+)
 create mode 100644 docs/source/en/using-diffusers/textual_inversion_inference
diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml
index 246b467d8b04..77cb2f59be8c 100644
--- a/docs/source/en/_toctree.yml
+++ b/docs/source/en/_toctree.yml
@@ -44,6 +44,8 @@
       title: Text-guided image-inpainting
     - local: using-diffusers/depth2img
       title: Text-guided depth-to-image
+    - local: using-diffusers/textual_inversion_inference
+      title: Textual inversion
     - local: using-diffusers/reusing_seeds
       title: Improve image quality with deterministic generation
     - local: using-diffusers/reproducibility
diff --git a/docs/source/en/using-diffusers/textual_inversion_inference b/docs/source/en/using-diffusers/textual_inversion_inference
new file mode 100644
index 000000000000..4cf237ad8d6e
--- /dev/null
+++ b/docs/source/en/using-diffusers/textual_inversion_inference
@@ -0,0 +1,82 @@
+# Textual inversion
+
+[[open-in-colab]]
+
+The [`StableDiffusionPipeline`] supports textual inversion, a technique that enables a model like Stable Diffusion to learn a new concept from just a few sample images. This gives you more control over the generated images and allows you to tailor the model towards specific concepts. You can get started quickly with a collection of community created concepts in the [Stable Diffusion Conceptualizer](https://huggingface.co/spaces/sd-concepts-library/stable-diffusion-conceptualizer).
+
+This guide will show you how to run inference with textual inversion using a pre-learned concept from the Stable Diffusion Conceptualizer. If you're interested in teaching a model new concepts with textual inversion, take a look at the [Textual Inversion](./training/text_inversion) training guide.
+
+Login to your Hugging Face account:
+
+```py
+from huggingface_hub import notebook_login
+
+notebook_login()
+```
+
+Import the necessary libraries, and create a helper function to visualize the generated images:
+
+```py
+import os
+import torch
+
+import PIL
+from PIL import Image
+
+from diffusers import StableDiffusionPipeline
+from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
+
+def image_grid(imgs, rows, cols):
+    assert len(imgs) == rows*cols
+
+    w, h = imgs[0].size
+    grid = Image.new('RGB', size=(cols*w, rows*h))
+    grid_w, grid_h = grid.size
+    
+    for i, img in enumerate(imgs):
+        grid.paste(img, box=(i%cols*w, i//cols*h))
+    return grid
+```
+
+Pick a Stable Diffusion checkpoint and a pre-learned concept from the [Stable Diffusion Conceptualizer](https://huggingface.co/spaces/sd-concepts-library/stable-diffusion-conceptualizer):
+
+```py
+pretrained_model_name_or_path = "runwayml/stable-diffusion-v1-5"
+repo_id_embeds = "sd-concepts-library/cat-toy"
+```
+
+Now you can load a pipeline, and pass the pre-learned concept to it:
+
+```py
+pipeline = StableDiffusionPipeline.from_pretrained(
+    pretrained_model_name_or_path,
+    torch_dtype=torch.float16
+    ).to("cuda")
+
+pipeline.load_textual_inversion(repo_id_embeds)
+```
+
+Create a prompt with the pre-learned concept by using the special placeholder token `<cat-toy>`, and choose the number of samples and rows of images you'd like to generate:
+
+```py
+prompt = "a grafitti in a favela wall with a <cat-toy> on it"
+
+num_samples = 2
+num_rows = 2
+```
+
+Then run the pipeline (feel free to adjust the parameters like `num_inference_steps` and `guidance_scale` to see how they affect image quality), save the generated images and visualize them with the helper function you created at the beginning:
+
+```py
+all_images = [] 
+for _ in range(num_rows):
+    images = pipe(prompt, num_images_per_prompt=num_samples, num_inference_steps=50, guidance_scale=7.5).images
+    all_images.extend(images)
+
+grid = image_grid(all_images, num_samples, num_rows)
+grid
+```
+
+<div class="flex justify-center">
+    <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/textual_inversion_inference.png"/>
+</div>

From ee9eb9a19f3d7c1da435a14f2ed2f3f71194c961 Mon Sep 17 00:00:00 2001
From: Steven Liu <steven.liu@huggingface.co>
Date: Wed, 17 May 2023 14:56:53 -0700
Subject: [PATCH 2/2] add to toctree

---
 ...ference => textual_inversion_inference.mdx} | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)
 rename docs/source/en/using-diffusers/{textual_inversion_inference => textual_inversion_inference.mdx} (90%)

diff --git a/docs/source/en/using-diffusers/textual_inversion_inference b/docs/source/en/using-diffusers/textual_inversion_inference.mdx
similarity index 90%
rename from docs/source/en/using-diffusers/textual_inversion_inference
rename to docs/source/en/using-diffusers/textual_inversion_inference.mdx
index 4cf237ad8d6e..9eca3e7e465c 100644
--- a/docs/source/en/using-diffusers/textual_inversion_inference
+++ b/docs/source/en/using-diffusers/textual_inversion_inference.mdx
@@ -26,15 +26,16 @@ from PIL import Image
 from diffusers import StableDiffusionPipeline
 from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
 
+
 def image_grid(imgs, rows, cols):
-    assert len(imgs) == rows*cols
+    assert len(imgs) == rows * cols
 
     w, h = imgs[0].size
-    grid = Image.new('RGB', size=(cols*w, rows*h))
+    grid = Image.new("RGB", size=(cols * w, rows * h))
     grid_w, grid_h = grid.size
-    
+
     for i, img in enumerate(imgs):
-        grid.paste(img, box=(i%cols*w, i//cols*h))
+        grid.paste(img, box=(i % cols * w, i // cols * h))
     return grid
 ```
 
@@ -48,10 +49,7 @@ repo_id_embeds = "sd-concepts-library/cat-toy"
 Now you can load a pipeline, and pass the pre-learned concept to it:
 
 ```py
-pipeline = StableDiffusionPipeline.from_pretrained(
-    pretrained_model_name_or_path,
-    torch_dtype=torch.float16
-    ).to("cuda")
+pipeline = StableDiffusionPipeline.from_pretrained(pretrained_model_name_or_path, torch_dtype=torch.float16).to("cuda")
 
 pipeline.load_textual_inversion(repo_id_embeds)
 ```
@@ -68,7 +66,7 @@ num_rows = 2
 Then run the pipeline (feel free to adjust the parameters like `num_inference_steps` and `guidance_scale` to see how they affect image quality), save the generated images and visualize them with the helper function you created at the beginning:
 
 ```py
-all_images = [] 
+all_images = []
 for _ in range(num_rows):
     images = pipe(prompt, num_images_per_prompt=num_samples, num_inference_steps=50, guidance_scale=7.5).images
     all_images.extend(images)
@@ -78,5 +76,5 @@ grid
 ```
 
 <div class="flex justify-center">
-    <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/textual_inversion_inference.png"/>
+    <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/textual_inversion_inference.png">
 </div>