From b5e067f51d90d5597a12db18c029d4c1aed7b963 Mon Sep 17 00:00:00 2001 From: Steven Liu Date: Wed, 17 May 2023 14:44:54 -0700 Subject: [PATCH 1/2] add textual inversion inference to docs --- docs/source/en/_toctree.yml | 2 + .../textual_inversion_inference | 82 +++++++++++++++++++ 2 files changed, 84 insertions(+) create mode 100644 docs/source/en/using-diffusers/textual_inversion_inference diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index 246b467d8b04..77cb2f59be8c 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -44,6 +44,8 @@ title: Text-guided image-inpainting - local: using-diffusers/depth2img title: Text-guided depth-to-image + - local: using-diffusers/textual_inversion_inference + title: Textual inversion - local: using-diffusers/reusing_seeds title: Improve image quality with deterministic generation - local: using-diffusers/reproducibility diff --git a/docs/source/en/using-diffusers/textual_inversion_inference b/docs/source/en/using-diffusers/textual_inversion_inference new file mode 100644 index 000000000000..4cf237ad8d6e --- /dev/null +++ b/docs/source/en/using-diffusers/textual_inversion_inference @@ -0,0 +1,82 @@ +# Textual inversion + +[[open-in-colab]] + +The [`StableDiffusionPipeline`] supports textual inversion, a technique that enables a model like Stable Diffusion to learn a new concept from just a few sample images. This gives you more control over the generated images and allows you to tailor the model towards specific concepts. You can get started quickly with a collection of community created concepts in the [Stable Diffusion Conceptualizer](https://huggingface.co/spaces/sd-concepts-library/stable-diffusion-conceptualizer). + +This guide will show you how to run inference with textual inversion using a pre-learned concept from the Stable Diffusion Conceptualizer. If you're interested in teaching a model new concepts with textual inversion, take a look at the [Textual Inversion](./training/text_inversion) training guide. + +Login to your Hugging Face account: + +```py +from huggingface_hub import notebook_login + +notebook_login() +``` + +Import the necessary libraries, and create a helper function to visualize the generated images: + +```py +import os +import torch + +import PIL +from PIL import Image + +from diffusers import StableDiffusionPipeline +from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer + +def image_grid(imgs, rows, cols): + assert len(imgs) == rows*cols + + w, h = imgs[0].size + grid = Image.new('RGB', size=(cols*w, rows*h)) + grid_w, grid_h = grid.size + + for i, img in enumerate(imgs): + grid.paste(img, box=(i%cols*w, i//cols*h)) + return grid +``` + +Pick a Stable Diffusion checkpoint and a pre-learned concept from the [Stable Diffusion Conceptualizer](https://huggingface.co/spaces/sd-concepts-library/stable-diffusion-conceptualizer): + +```py +pretrained_model_name_or_path = "runwayml/stable-diffusion-v1-5" +repo_id_embeds = "sd-concepts-library/cat-toy" +``` + +Now you can load a pipeline, and pass the pre-learned concept to it: + +```py +pipeline = StableDiffusionPipeline.from_pretrained( + pretrained_model_name_or_path, + torch_dtype=torch.float16 + ).to("cuda") + +pipeline.load_textual_inversion(repo_id_embeds) +``` + +Create a prompt with the pre-learned concept by using the special placeholder token ``, and choose the number of samples and rows of images you'd like to generate: + +```py +prompt = "a grafitti in a favela wall with a on it" + +num_samples = 2 +num_rows = 2 +``` + +Then run the pipeline (feel free to adjust the parameters like `num_inference_steps` and `guidance_scale` to see how they affect image quality), save the generated images and visualize them with the helper function you created at the beginning: + +```py +all_images = [] +for _ in range(num_rows): + images = pipe(prompt, num_images_per_prompt=num_samples, num_inference_steps=50, guidance_scale=7.5).images + all_images.extend(images) + +grid = image_grid(all_images, num_samples, num_rows) +grid +``` + +
+ +
From ee9eb9a19f3d7c1da435a14f2ed2f3f71194c961 Mon Sep 17 00:00:00 2001 From: Steven Liu Date: Wed, 17 May 2023 14:56:53 -0700 Subject: [PATCH 2/2] add to toctree --- ...ference => textual_inversion_inference.mdx} | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) rename docs/source/en/using-diffusers/{textual_inversion_inference => textual_inversion_inference.mdx} (90%) diff --git a/docs/source/en/using-diffusers/textual_inversion_inference b/docs/source/en/using-diffusers/textual_inversion_inference.mdx similarity index 90% rename from docs/source/en/using-diffusers/textual_inversion_inference rename to docs/source/en/using-diffusers/textual_inversion_inference.mdx index 4cf237ad8d6e..9eca3e7e465c 100644 --- a/docs/source/en/using-diffusers/textual_inversion_inference +++ b/docs/source/en/using-diffusers/textual_inversion_inference.mdx @@ -26,15 +26,16 @@ from PIL import Image from diffusers import StableDiffusionPipeline from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer + def image_grid(imgs, rows, cols): - assert len(imgs) == rows*cols + assert len(imgs) == rows * cols w, h = imgs[0].size - grid = Image.new('RGB', size=(cols*w, rows*h)) + grid = Image.new("RGB", size=(cols * w, rows * h)) grid_w, grid_h = grid.size - + for i, img in enumerate(imgs): - grid.paste(img, box=(i%cols*w, i//cols*h)) + grid.paste(img, box=(i % cols * w, i // cols * h)) return grid ``` @@ -48,10 +49,7 @@ repo_id_embeds = "sd-concepts-library/cat-toy" Now you can load a pipeline, and pass the pre-learned concept to it: ```py -pipeline = StableDiffusionPipeline.from_pretrained( - pretrained_model_name_or_path, - torch_dtype=torch.float16 - ).to("cuda") +pipeline = StableDiffusionPipeline.from_pretrained(pretrained_model_name_or_path, torch_dtype=torch.float16).to("cuda") pipeline.load_textual_inversion(repo_id_embeds) ``` @@ -68,7 +66,7 @@ num_rows = 2 Then run the pipeline (feel free to adjust the parameters like `num_inference_steps` and `guidance_scale` to see how they affect image quality), save the generated images and visualize them with the helper function you created at the beginning: ```py -all_images = [] +all_images = [] for _ in range(num_rows): images = pipe(prompt, num_images_per_prompt=num_samples, num_inference_steps=50, guidance_scale=7.5).images all_images.extend(images) @@ -78,5 +76,5 @@ grid ```
- +