From 7f32aac9b949ed6f4fdc2fbba9e2027ef416f42c Mon Sep 17 00:00:00 2001 From: hyeminan Date: Sun, 16 Jul 2023 16:05:20 +0900 Subject: [PATCH 1/4] stable_diffusion_jax --- .../stable_diffusion_jax_how_to.mdx | 252 ++++++++++++++++++ 1 file changed, 252 insertions(+) create mode 100644 docs/source/ko/using-diffusers/stable_diffusion_jax_how_to.mdx diff --git a/docs/source/ko/using-diffusers/stable_diffusion_jax_how_to.mdx b/docs/source/ko/using-diffusers/stable_diffusion_jax_how_to.mdx new file mode 100644 index 000000000000..808c0505ab67 --- /dev/null +++ b/docs/source/ko/using-diffusers/stable_diffusion_jax_how_to.mdx @@ -0,0 +1,252 @@ +# 🧨 Stable Diffusion in JAX / Flax ! + +[[open-in-colab]] + +πŸ€— Hugging Face [Diffusers] (https://github.com/huggingface/diffusers) λŠ” 버전 0.5.1λΆ€ν„° Flaxλ₯Ό μ§€μ›ν•©λ‹ˆλ‹€! 이λ₯Ό 톡해 Colab, Kaggle, Google Cloud Platformμ—μ„œ μ‚¬μš©ν•  수 μžˆλŠ” κ²ƒμ²˜λŸΌ Google TPUμ—μ„œ μ΄ˆκ³ μ† 좔둠이 κ°€λŠ₯ν•©λ‹ˆλ‹€. + +이 λ…ΈνŠΈλΆμ€ JAX / Flaxλ₯Ό μ‚¬μš©ν•΄ 좔둠을 μ‹€ν–‰ν•˜λŠ” 방법을 λ³΄μ—¬μ€λ‹ˆλ‹€. Stable Diffusion의 μž‘λ™ 방식에 λŒ€ν•œ μžμ„Έν•œ λ‚΄μš©μ„ μ›ν•˜κ±°λ‚˜ GPUμ—μ„œ μ‹€ν–‰ν•˜λ €λ©΄ 이 [λ…ΈνŠΈλΆ] ](https://huggingface.co/docs/diffusers/stable_diffusion)을 μ°Έμ‘°ν•˜μ„Έμš”. + +λ¨Όμ €, TPU λ°±μ—”λ“œλ₯Ό μ‚¬μš©ν•˜κ³  μžˆλŠ”μ§€ ν™•μΈν•©λ‹ˆλ‹€. Colabμ—μ„œ 이 λ…ΈνŠΈλΆμ„ μ‹€ν–‰ν•˜λŠ” 경우, λ©”λ‰΄μ—μ„œ λŸ°νƒ€μž„μ„ μ„ νƒν•œ λ‹€μŒ "λŸ°νƒ€μž„ μœ ν˜• λ³€κ²½" μ˜΅μ…˜μ„ μ„ νƒν•œ λ‹€μŒ ν•˜λ“œμ›¨μ–΄ 가속기 μ„€μ •μ—μ„œ TPUλ₯Ό μ„ νƒν•©λ‹ˆλ‹€. + +JAXλŠ” TPU μ „μš©μ€ μ•„λ‹ˆμ§€λ§Œ 각 TPU μ„œλ²„μ—λŠ” 8개의 TPU 가속기가 λ³‘λ ¬λ‘œ μž‘λ™ν•˜κΈ° λ•Œλ¬Έμ— ν•΄λ‹Ή ν•˜λ“œμ›¨μ–΄μ—μ„œ 더 빛을 λ°œν•œλ‹€λŠ” 점은 μ•Œμ•„λ‘μ„Έμš”. + + +## Setup + +λ¨Όμ € diffusersκ°€ μ„€μΉ˜λ˜μ–΄ μžˆλŠ”μ§€ ν™•μΈν•©λ‹ˆλ‹€. + +```bash +!pip install jax==0.3.25 jaxlib==0.3.25 flax transformers ftfy +!pip install diffusers +``` + +```python +import jax.tools.colab_tpu + +jax.tools.colab_tpu.setup_tpu() +import jax +``` + +```python +num_devices = jax.device_count() +device_type = jax.devices()[0].device_kind + +print(f"Found {num_devices} JAX devices of type {device_type}.") +assert ( + "TPU" in device_type +), "Available device is not a TPU, please select TPU from Edit > Notebook settings > Hardware accelerator" +``` + +```python out +Found 8 JAX devices of type Cloud TPU. +``` + +그런 λ‹€μŒ λͺ¨λ“  dependenciesλ₯Ό κ°€μ Έμ˜΅λ‹ˆλ‹€. + +```python +import numpy as np +import jax +import jax.numpy as jnp + +from pathlib import Path +from jax import pmap +from flax.jax_utils import replicate +from flax.training.common_utils import shard +from PIL import Image + +from huggingface_hub import notebook_login +from diffusers import FlaxStableDiffusionPipeline +``` + +## λͺ¨λΈ 뢈러였기 + +TPU μž₯μΉ˜λŠ” 효율적인 half-float μœ ν˜•μΈ bfloat16을 μ§€μ›ν•©λ‹ˆλ‹€. ν…ŒμŠ€νŠΈμ—λŠ” 이 μœ ν˜•μ„ μ‚¬μš©ν•˜μ§€λ§Œ λŒ€μ‹  float32λ₯Ό μ‚¬μš©ν•˜μ—¬ 전체 정밀도(full precision)λ₯Ό μ‚¬μš©ν•  μˆ˜λ„ μžˆμŠ΅λ‹ˆλ‹€. + +```python +dtype = jnp.bfloat16 +``` + +FlaxλŠ” ν•¨μˆ˜ν˜• ν”„λ ˆμž„μ›Œν¬μ΄λ―€λ‘œ λͺ¨λΈμ€ λ¬΄μƒνƒœ(stateless)ν˜•μ΄λ©° λ§€κ°œλ³€μˆ˜λŠ” λͺ¨λΈ 외뢀에 μ €μž₯λ©λ‹ˆλ‹€. μ‚¬μ „ν•™μŠ΅λœ Flax νŒŒμ΄ν”„λΌμΈμ„ 뢈러였면 νŒŒμ΄ν”„λΌμΈ μžμ²΄μ™€ λͺ¨λΈ κ°€μ€‘μΉ˜(λ˜λŠ” λ§€κ°œλ³€μˆ˜)κ°€ λͺ¨λ‘ λ°˜ν™˜λ©λ‹ˆλ‹€. μ €ν¬λŠ” bf16 λ²„μ „μ˜ κ°€μ€‘μΉ˜λ₯Ό μ‚¬μš©ν•˜κ³  μžˆμœΌλ―€λ‘œ μœ ν˜• κ²½κ³ κ°€ ν‘œμ‹œλ˜μ§€λ§Œ λ¬΄μ‹œν•΄λ„ λ©λ‹ˆλ‹€. + +```python +pipeline, params = FlaxStableDiffusionPipeline.from_pretrained( + "CompVis/stable-diffusion-v1-4", + revision="bf16", + dtype=dtype, +) +``` + +## μΆ”λ‘  + +TPUμ—λŠ” 일반적으둜 8개의 λ””λ°”μ΄μŠ€κ°€ λ³‘λ ¬λ‘œ μž‘λ™ν•˜λ―€λ‘œ λ³΄μœ ν•œ λ””λ°”μ΄μŠ€ 수만큼 ν”„λ‘¬ν”„νŠΈλ₯Ό λ³΅μ œν•©λ‹ˆλ‹€. 그런 λ‹€μŒ 각각 ν•˜λ‚˜μ˜ 이미지 생성을 λ‹΄λ‹Ήν•˜λŠ” 8개의 λ””λ°”μ΄μŠ€μ—μ„œ ν•œ λ²ˆμ— 좔둠을 μˆ˜ν–‰ν•©λ‹ˆλ‹€. λ”°λΌμ„œ ν•˜λ‚˜μ˜ 칩이 ν•˜λ‚˜μ˜ 이미지λ₯Ό μƒμ„±ν•˜λŠ” 데 κ±Έλ¦¬λŠ” μ‹œκ°„κ³Ό λ™μΌν•œ μ‹œκ°„μ— 8개의 이미지λ₯Ό 얻을 수 μžˆμŠ΅λ‹ˆλ‹€. + +ν”„λ‘¬ν”„νŠΈλ₯Ό λ³΅μ œν•˜κ³  λ‚˜λ©΄ νŒŒμ΄ν”„λΌμΈμ˜ `prepare_inputs` ν•¨μˆ˜λ₯Ό ν˜ΈμΆœν•˜μ—¬ ν† ν°ν™”λœ ν…μŠ€νŠΈ IDλ₯Ό μ–»μŠ΅λ‹ˆλ‹€. ν† ν°ν™”λœ ν…μŠ€νŠΈμ˜ κΈΈμ΄λŠ” κΈ°λ³Έ CLIP ν…μŠ€νŠΈ λͺ¨λΈμ˜ ꡬ성에 따라 77ν† ν°μœΌλ‘œ μ„€μ •λ©λ‹ˆλ‹€. + +```python +prompt = "A cinematic film still of Morgan Freeman starring as Jimi Hendrix, portrait, 40mm lens, shallow depth of field, close up, split lighting, cinematic" +prompt = [prompt] * jax.device_count() +prompt_ids = pipeline.prepare_inputs(prompt) +prompt_ids.shape +``` + +```python out +(8, 77) +``` + +### 볡사(Replication) 및 μ •λ ¬ν™” + +λͺ¨λΈ λ§€κ°œλ³€μˆ˜μ™€ μž…λ ₯값은 μš°λ¦¬κ°€ λ³΄μœ ν•œ 8개의 병렬 μž₯μΉ˜μ— 볡사(Replication)λ˜μ–΄μ•Ό ν•©λ‹ˆλ‹€. λ§€κ°œλ³€μˆ˜ λ”•μ…”λ„ˆλ¦¬λŠ” `flax.jax_utils.replicate`(λ”•μ…”λ„ˆλ¦¬λ₯Ό μˆœνšŒν•˜λ©° κ°€μ€‘μΉ˜μ˜ λͺ¨μ–‘을 λ³€κ²½ν•˜μ—¬ 8번 λ°˜λ³΅ν•˜λŠ” ν•¨μˆ˜)λ₯Ό μ‚¬μš©ν•˜μ—¬ λ³΅μ‚¬λ©λ‹ˆλ‹€. 배열은 `shard`λ₯Ό μ‚¬μš©ν•˜μ—¬ λ³΅μ œλ©λ‹ˆλ‹€. + +```python +p_params = replicate(params) +``` + +```python +prompt_ids = shard(prompt_ids) +prompt_ids.shape +``` + +```python out +(8, 1, 77) +``` + +이 shape은 8개의 λ””λ°”μ΄μŠ€ 각각이 shape `(1, 77)`의 jnp 배열을 μž…λ ₯κ°’μœΌλ‘œ λ°›λŠ”λ‹€λŠ” μ˜λ―Έμž…λ‹ˆλ‹€. 즉 1은 λ””λ°”μ΄μŠ€λ‹Ή batch(배치) ν¬κΈ°μž…λ‹ˆλ‹€. λ©”λͺ¨λ¦¬κ°€ μΆ©λΆ„ν•œ TPUμ—μ„œλŠ” ν•œ λ²ˆμ— μ—¬λŸ¬ 이미지(μΉ©λ‹Ή)λ₯Ό μƒμ„±ν•˜λ €λŠ” 경우 1보닀 클 수 μžˆμŠ΅λ‹ˆλ‹€. + +이미지λ₯Ό 생성할 μ€€λΉ„κ°€ 거의 μ™„λ£Œλ˜μ—ˆμŠ΅λ‹ˆλ‹€! 이제 생성 ν•¨μˆ˜μ— 전달할 λ‚œμˆ˜ μƒμ„±κΈ°λ§Œ λ§Œλ“€λ©΄ λ©λ‹ˆλ‹€. 이것은 λ‚œμˆ˜λ₯Ό λ‹€λ£¨λŠ” λͺ¨λ“  ν•¨μˆ˜μ— λ‚œμˆ˜ 생성기가 μžˆμ–΄μ•Ό ν•œλ‹€λŠ”, λ‚œμˆ˜μ— λŒ€ν•΄ 맀우 μ§„μ§€ν•˜κ³  독단적인 Flax의 ν‘œμ€€ μ ˆμ°¨μž…λ‹ˆλ‹€. μ΄λ ‡κ²Œ ν•˜λ©΄ μ—¬λŸ¬ λΆ„μ‚°λœ κΈ°κΈ°μ—μ„œ ν›ˆλ ¨ν•  λ•Œμ—λ„ μž¬ν˜„μ„±μ΄ 보μž₯λ©λ‹ˆλ‹€. + +μ•„λž˜ λ„μš°λ―Έ ν•¨μˆ˜λŠ” μ‹œλ“œλ₯Ό μ‚¬μš©ν•˜μ—¬ λ‚œμˆ˜ 생성기λ₯Ό μ΄ˆκΈ°ν™”ν•©λ‹ˆλ‹€. λ™μΌν•œ μ‹œλ“œλ₯Ό μ‚¬μš©ν•˜λŠ” ν•œ μ •ν™•νžˆ λ™μΌν•œ κ²°κ³Όλ₯Ό 얻을 수 μžˆμŠ΅λ‹ˆλ‹€. λ‚˜μ€‘μ— λ…ΈνŠΈλΆμ—μ„œ κ²°κ³Όλ₯Ό 탐색할 λ•Œμ—” λ‹€λ₯Έ μ‹œλ“œλ₯Ό 자유둭게 μ‚¬μš©ν•˜μ„Έμš”. + +```python +def create_key(seed=0): + return jax.random.PRNGKey(seed) +``` + +rngλ₯Ό 얻은 λ‹€μŒ 8번 'λΆ„ν• 'ν•˜μ—¬ 각 λ””λ°”μ΄μŠ€κ°€ λ‹€λ₯Έ μ œλ„ˆλ ˆμ΄ν„°λ₯Ό μˆ˜μ‹ ν•˜λ„λ‘ ν•©λ‹ˆλ‹€. λ”°λΌμ„œ 각 λ””λ°”μ΄μŠ€λ§ˆλ‹€ λ‹€λ₯Έ 이미지가 μƒμ„±λ˜λ©° 전체 ν”„λ‘œμ„ΈμŠ€λ₯Ό μž¬ν˜„ν•  수 μžˆμŠ΅λ‹ˆλ‹€. + +```python +rng = create_key(0) +rng = jax.random.split(rng, jax.device_count()) +``` + +JAX μ½”λ“œλŠ” 맀우 λΉ λ₯΄κ²Œ μ‹€ν–‰λ˜λŠ” 효율적인 ν‘œν˜„μœΌλ‘œ μ»΄νŒŒμΌν•  수 μžˆμŠ΅λ‹ˆλ‹€. ν•˜μ§€λ§Œ 후속 ν˜ΈμΆœμ—μ„œ λͺ¨λ“  μž…λ ₯이 λ™μΌν•œ λͺ¨μ–‘을 갖도둝 ν•΄μ•Ό ν•˜λ©°, κ·Έλ ‡μ§€ μ•ŠμœΌλ©΄ JAXκ°€ μ½”λ“œλ₯Ό λ‹€μ‹œ μ»΄νŒŒμΌν•΄μ•Ό ν•˜λ―€λ‘œ μ΅œμ ν™”λœ 속도λ₯Ό ν™œμš©ν•  수 μ—†μŠ΅λ‹ˆλ‹€. + +`jit = True`λ₯Ό 인수둜 μ „λ‹¬ν•˜λ©΄ Flax νŒŒμ΄ν”„λΌμΈμ΄ μ½”λ“œλ₯Ό μ»΄νŒŒμΌν•  수 μžˆμŠ΅λ‹ˆλ‹€. λ˜ν•œ λͺ¨λΈμ΄ μ‚¬μš© κ°€λŠ₯ν•œ 8개의 λ””λ°”μ΄μŠ€μ—μ„œ λ³‘λ ¬λ‘œ μ‹€ν–‰λ˜λ„λ‘ 보μž₯ν•©λ‹ˆλ‹€. + +λ‹€μŒ 셀을 처음 μ‹€ν–‰ν•˜λ©΄ μ»΄νŒŒμΌν•˜λŠ” 데 μ‹œκ°„μ΄ 였래 κ±Έλ¦¬μ§€λ§Œ 이후 호좜(μž…λ ₯이 λ‹€λ₯Έ κ²½μš°μ—λ„)은 훨씬 λΉ¨λΌμ§‘λ‹ˆλ‹€. 예λ₯Ό λ“€μ–΄, ν…ŒμŠ€νŠΈν–ˆμ„ λ•Œ TPU v2-8μ—μ„œ μ»΄νŒŒμΌν•˜λŠ” 데 1λΆ„ 이상 κ±Έλ¦¬μ§€λ§Œ 이후 μΆ”λ‘  μ‹€ν–‰μ—λŠ” μ•½ 7μ΄ˆκ°€ κ±Έλ¦½λ‹ˆλ‹€. + +``` +%%time +images = pipeline(prompt_ids, p_params, rng, jit=True)[0] +``` + +```python out +CPU times: user 56.2 s, sys: 42.5 s, total: 1min 38s +Wall time: 1min 29s +``` + +λ°˜ν™˜λœ λ°°μ—΄μ˜ shape은 `(8, 1, 512, 512, 3)`μž…λ‹ˆλ‹€. 이λ₯Ό μž¬κ΅¬μ„±ν•˜μ—¬ 두 번째 차원을 μ œκ±°ν•˜κ³  512 Γ— 512 Γ— 3의 이미지 8개λ₯Ό 얻은 λ‹€μŒ PIL둜 λ³€ν™˜ν•©λ‹ˆλ‹€. + +```python +images = images.reshape((images.shape[0] * images.shape[1],) + images.shape[-3:]) +images = pipeline.numpy_to_pil(images) +``` + +### μ‹œκ°ν™” + +이미지λ₯Ό κ·Έλ¦¬λ“œμ— ν‘œμ‹œν•˜λŠ” λ„μš°λ―Έ ν•¨μˆ˜λ₯Ό λ§Œλ“€μ–΄ λ³΄κ² μŠ΅λ‹ˆλ‹€. + +```python +def image_grid(imgs, rows, cols): + w, h = imgs[0].size + grid = Image.new("RGB", size=(cols * w, rows * h)) + for i, img in enumerate(imgs): + grid.paste(img, box=(i % cols * w, i // cols * h)) + return grid +``` + +```python +image_grid(images, 2, 4) +``` + +![img](https://huggingface.co/datasets/YiYiXu/test-doc-assets/resolve/main/stable_diffusion_jax_how_to_cell_38_output_0.jpeg) + + +## λ‹€λ₯Έ ν”„λ‘¬ν”„νŠΈ μ‚¬μš© + +λͺ¨λ“  λ””λ°”μ΄μŠ€μ—μ„œ λ™μΌν•œ ν”„λ‘¬ν”„νŠΈλ₯Ό λ³΅μ œν•  ν•„μš”λŠ” μ—†μŠ΅λ‹ˆλ‹€. ν”„λ‘¬ν”„νŠΈ 2개λ₯Ό 각각 4λ²ˆμ”© μƒμ„±ν•˜κ±°λ‚˜ ν•œ λ²ˆμ— 8개의 μ„œλ‘œ λ‹€λ₯Έ ν”„λ‘¬ν”„νŠΈλ₯Ό μƒμ„±ν•˜λŠ” λ“± μ›ν•˜λŠ” 것은 무엇이든 ν•  수 μžˆμŠ΅λ‹ˆλ‹€. ν•œλ²ˆ ν•΄λ³΄μ„Έμš”! + +λ¨Όμ € μž…λ ₯ μ€€λΉ„ μ½”λ“œλ₯Ό νŽΈλ¦¬ν•œ ν•¨μˆ˜λ‘œ λ¦¬νŒ©ν„°λ§ν•˜κ² μŠ΅λ‹ˆλ‹€: + +```python +prompts = [ + "Labrador in the style of Hokusai", + "Painting of a squirrel skating in New York", + "HAL-9000 in the style of Van Gogh", + "Times Square under water, with fish and a dolphin swimming around", + "Ancient Roman fresco showing a man working on his laptop", + "Close-up photograph of young black woman against urban background, high quality, bokeh", + "Armchair in the shape of an avocado", + "Clown astronaut in space, with Earth in the background", +] +``` + +```python +prompt_ids = pipeline.prepare_inputs(prompts) +prompt_ids = shard(prompt_ids) + +images = pipeline(prompt_ids, p_params, rng, jit=True).images +images = images.reshape((images.shape[0] * images.shape[1],) + images.shape[-3:]) +images = pipeline.numpy_to_pil(images) + +image_grid(images, 2, 4) +``` + +![img](https://huggingface.co/datasets/YiYiXu/test-doc-assets/resolve/main/stable_diffusion_jax_how_to_cell_43_output_0.jpeg) + + +## 병렬화(parallelization)λŠ” μ–΄λ–»κ²Œ μž‘λ™ν•˜λŠ”κ°€? + +μ•žμ„œ `diffusers` Flax νŒŒμ΄ν”„λΌμΈμ΄ λͺ¨λΈμ„ μžλ™μœΌλ‘œ μ»΄νŒŒμΌν•˜κ³  μ‚¬μš© κ°€λŠ₯ν•œ λͺ¨λ“  κΈ°κΈ°μ—μ„œ λ³‘λ ¬λ‘œ μ‹€ν–‰ν•œλ‹€κ³  λ§μ”€λ“œλ ΈμŠ΅λ‹ˆλ‹€. 이제 κ·Έ ν”„λ‘œμ„ΈμŠ€λ₯Ό κ°„λž΅ν•˜κ²Œ μ‚΄νŽ΄λ³΄κ³  μž‘λ™ 방식을 λ³΄μ—¬λ“œλ¦¬κ² μŠ΅λ‹ˆλ‹€. + +JAX λ³‘λ ¬ν™”λŠ” μ—¬λŸ¬ κ°€μ§€ λ°©λ²•μœΌλ‘œ μˆ˜ν–‰ν•  수 μžˆμŠ΅λ‹ˆλ‹€. κ°€μž₯ μ‰¬μš΄ 방법은 jax.pmap ν•¨μˆ˜λ₯Ό μ‚¬μš©ν•˜μ—¬ 단일 ν”„λ‘œκ·Έλž¨, 닀쀑 데이터(SPMD) 병렬화λ₯Ό λ‹¬μ„±ν•˜λŠ” κ²ƒμž…λ‹ˆλ‹€. 즉, λ™μΌν•œ μ½”λ“œμ˜ 볡사본을 각각 λ‹€λ₯Έ 데이터 μž…λ ₯에 λŒ€ν•΄ μ—¬λŸ¬ 개 μ‹€ν–‰ν•˜λŠ” κ²ƒμž…λ‹ˆλ‹€. 더 μ •κ΅ν•œ μ ‘κ·Ό 방식도 κ°€λŠ₯ν•˜λ―€λ‘œ 관심이 μžˆμœΌμ‹œλ‹€λ©΄ [JAX λ¬Έμ„œ](https://jax.readthedocs.io/en/latest/index.html)와 [`pjit` νŽ˜μ΄μ§€](https://jax.readthedocs.io/en/latest/jax-101/08-pjit.html?highlight=pjit)μ—μ„œ 이 주제λ₯Ό μ‚΄νŽ΄λ³΄μ‹œκΈ° λ°”λžλ‹ˆλ‹€! + +`jax.pmap`은 두 κ°€μ§€ κΈ°λŠ₯을 μˆ˜ν–‰ν•©λ‹ˆλ‹€: + +- `jax.jit()`λ₯Ό ν˜ΈμΆœν•œ κ²ƒμ²˜λŸΌ μ½”λ“œλ₯Ό 컴파일(λ˜λŠ” `jit`)ν•©λ‹ˆλ‹€. 이 μž‘μ—…μ€ `pmap`을 ν˜ΈμΆœν•  λ•Œκ°€ μ•„λ‹ˆλΌ pmapped ν•¨μˆ˜κ°€ 처음 호좜될 λ•Œ μˆ˜ν–‰λ©λ‹ˆλ‹€. +- 컴파일된 μ½”λ“œκ°€ μ‚¬μš© κ°€λŠ₯ν•œ λͺ¨λ“  κΈ°κΈ°μ—μ„œ λ³‘λ ¬λ‘œ μ‹€ν–‰λ˜λ„λ‘ ν•©λ‹ˆλ‹€. + +μž‘λ™ 방식을 λ³΄μ—¬λ“œλ¦¬κΈ° μœ„ν•΄ 이미지 생성을 μ‹€ν–‰ν•˜λŠ” λΉ„κ³΅κ°œ λ©”μ„œλ“œμΈ νŒŒμ΄ν”„λΌμΈμ˜ `_generate` λ©”μ„œλ“œλ₯Ό `pmap`ν•©λ‹ˆλ‹€. 이 λ©”μ„œλ“œλŠ” ν–₯ν›„ `Diffusers` λ¦΄λ¦¬μŠ€μ—μ„œ 이름이 λ³€κ²½λ˜κ±°λ‚˜ 제거될 수 μžˆλ‹€λŠ” 점에 μœ μ˜ν•˜μ„Έμš”. + +```python +p_generate = pmap(pipeline._generate) +``` + +`pmap`을 μ‚¬μš©ν•œ ν›„ μ€€λΉ„λœ ν•¨μˆ˜ `p_generate`λŠ” κ°œλ…μ μœΌλ‘œ λ‹€μŒμ„ μˆ˜ν–‰ν•©λ‹ˆλ‹€: +* 각 μž₯μΉ˜μ—μ„œ κΈ°λ³Έ ν•¨μˆ˜ `pipeline._generate`의 볡사본을 ν˜ΈμΆœν•©λ‹ˆλ‹€. +* 각 μž₯μΉ˜μ— μž…λ ₯ 인수의 λ‹€λ₯Έ 뢀뢄을 λ³΄λƒ…λ‹ˆλ‹€. 이것이 λ°”λ‘œ 샀딩이 μ‚¬μš©λ˜λŠ” μ΄μœ μž…λ‹ˆλ‹€. 이 경우 `prompt_ids`의 shape은 `(8, 1, 77, 768)`μž…λ‹ˆλ‹€. 이 배열은 8개둜 λΆ„ν• λ˜κ³  `_generate`의 각 볡사본은 `(1, 77, 768)`의 shape을 κ°€μ§„ μž…λ ₯을 λ°›κ²Œ λ©λ‹ˆλ‹€. + +λ³‘λ ¬λ‘œ ν˜ΈμΆœλœλ‹€λŠ” 사싀을 μ™„μ „νžˆ λ¬΄μ‹œν•˜κ³  `_generate`λ₯Ό μ½”λ”©ν•  수 μžˆμŠ΅λ‹ˆλ‹€. batch(배치) 크기(이 μ˜ˆμ œμ—μ„œλŠ” `1`)와 μ½”λ“œμ— μ ν•©ν•œ μ°¨μ›λ§Œ μ‹ κ²½ μ“°λ©΄ 되며, λ³‘λ ¬λ‘œ μž‘λ™ν•˜κΈ° μœ„ν•΄ 아무것도 λ³€κ²½ν•  ν•„μš”κ°€ μ—†μŠ΅λ‹ˆλ‹€. + +νŒŒμ΄ν”„λΌμΈ ν˜ΈμΆœμ„ μ‚¬μš©ν•  λ•Œμ™€ λ§ˆμ°¬κ°€μ§€λ‘œ, λ‹€μŒ 셀을 처음 μ‹€ν–‰ν•  λ•ŒλŠ” μ‹œκ°„μ΄ κ±Έλ¦¬μ§€λ§Œ κ·Έ μ΄ν›„μ—λŠ” 훨씬 λΉ¨λΌμ§‘λ‹ˆλ‹€. + +``` +%%time +images = p_generate(prompt_ids, p_params, rng) +images = images.block_until_ready() +images.shape +``` + +```python out +CPU times: user 1min 15s, sys: 18.2 s, total: 1min 34s +Wall time: 1min 15s +``` + +```python +images.shape +``` + +```python out +(8, 1, 512, 512, 3) +``` + +JAXλŠ” 비동기 λ””μŠ€νŒ¨μΉ˜λ₯Ό μ‚¬μš©ν•˜κ³  κ°€λŠ₯ν•œ ν•œ 빨리 μ œμ–΄κΆŒμ„ Python 루프에 λ°˜ν™˜ν•˜κΈ° λ•Œλ¬Έμ— μΆ”λ‘  μ‹œκ°„μ„ μ •ν™•ν•˜κ²Œ μΈ‘μ •ν•˜κΈ° μœ„ν•΄ `block_until_ready()`λ₯Ό μ‚¬μš©ν•©λ‹ˆλ‹€. 아직 κ΅¬μ²΄ν™”λ˜μ§€ μ•Šμ€ 계산 κ²°κ³Όλ₯Ό μ‚¬μš©ν•˜λ €λŠ” 경우 μžλ™μœΌλ‘œ 차단이 μˆ˜ν–‰λ˜λ―€λ‘œ μ½”λ“œμ—μ„œ 이 ν•¨μˆ˜λ₯Ό μ‚¬μš©ν•  ν•„μš”κ°€ μ—†μŠ΅λ‹ˆλ‹€. \ No newline at end of file From 7e56d6033f0495e22f84cd6b8667b597d2d88ad3 Mon Sep 17 00:00:00 2001 From: hyeminan Date: Sun, 30 Jul 2023 15:45:14 +0900 Subject: [PATCH 2/4] index_update --- docs/source/ko/index.mdx | 124 +++++++++++++++++++++++++-------------- 1 file changed, 79 insertions(+), 45 deletions(-) diff --git a/docs/source/ko/index.mdx b/docs/source/ko/index.mdx index d01dff5c5e00..59f31131fa7f 100644 --- a/docs/source/ko/index.mdx +++ b/docs/source/ko/index.mdx @@ -16,48 +16,82 @@ specific language governing permissions and limitations under the License.

-# 🧨 Diffusers - -πŸ€— DiffusersλŠ” μ‚¬μ „ν•™μŠ΅λœ λΉ„μ „ 및 μ˜€λ””μ˜€ ν™•μ‚° λͺ¨λΈμ„ μ œκ³΅ν•˜κ³ , μΆ”λ‘  및 ν•™μŠ΅μ„ μœ„ν•œ λͺ¨λ“ˆμ‹ 도ꡬ μƒμž 역할을 ν•©λ‹ˆλ‹€. - -보닀 μ •ν™•ν•˜κ²Œ, πŸ€— DiffusersλŠ” λ‹€μŒμ„ μ œκ³΅ν•©λ‹ˆλ‹€: - -- 단 λͺ‡ μ€„μ˜ μ½”λ“œλ‘œ 좔둠을 μ‹€ν–‰ν•  수 μžˆλŠ” μ΅œμ‹  ν™•μ‚° νŒŒμ΄ν”„λΌμΈμ„ μ œκ³΅ν•©λ‹ˆλ‹€. ([**Using Diffusers**](./using-diffusers/conditional_image_generation)λ₯Ό μ‚΄νŽ΄λ³΄μ„Έμš”) μ§€μ›λ˜λŠ” λͺ¨λ“  νŒŒμ΄ν”„λΌμΈκ³Ό ν•΄λ‹Ή 논문에 λŒ€ν•œ κ°œμš”λ₯Ό 보렀면 [**Pipelines**](#pipelines)을 μ‚΄νŽ΄λ³΄μ„Έμš”. -- μΆ”λ‘ μ—μ„œ 속도 vs ν’ˆμ§ˆμ˜ μ ˆμΆ©μ„ μœ„ν•΄ μƒν˜Έκ΅ν™˜μ μœΌλ‘œ μ‚¬μš©ν•  수 μžˆλŠ” λ‹€μ–‘ν•œ λ…Έμ΄μ¦ˆ μŠ€μΌ€μ€„λŸ¬λ₯Ό μ œκ³΅ν•©λ‹ˆλ‹€. μžμ„Έν•œ λ‚΄μš©μ€ [**Schedulers**](./api/schedulers/overview)λ₯Ό μ°Έκ³ ν•˜μ„Έμš”. -- UNetκ³Ό 같은 μ—¬λŸ¬ μœ ν˜•μ˜ λͺ¨λΈμ„ end-to-end ν™•μ‚° μ‹œμŠ€ν…œμ˜ ꡬ성 μš”μ†Œλ‘œ μ‚¬μš©ν•  수 μžˆμŠ΅λ‹ˆλ‹€. μžμ„Έν•œ λ‚΄μš©μ€ [**Models**](./api/models)을 μ°Έκ³ ν•˜μ„Έμš”. -- κ°€μž₯ μΈκΈ°μžˆλŠ” ν™•μ‚° λͺ¨λΈ ν…ŒμŠ€ν¬λ₯Ό ν•™μŠ΅ν•˜λŠ” 방법을 λ³΄μ—¬μ£ΌλŠ” μ˜ˆμ œλ“€μ„ μ œκ³΅ν•©λ‹ˆλ‹€. μžμ„Έν•œ λ‚΄μš©μ€ [**Training**](./training/overview)λ₯Ό μ°Έκ³ ν•˜μ„Έμš”. - -## 🧨 Diffusers νŒŒμ΄ν”„λΌμΈ - -λ‹€μŒ ν‘œμ—λŠ” κ³΅μ‹œμ μœΌλ‘œ μ§€μ›λ˜λŠ” λͺ¨λ“  νŒŒμ΄ν”„λΌμΈ, κ΄€λ ¨ λ…Όλ¬Έ, 직접 μ‚¬μš©ν•΄ λ³Ό 수 μžˆλŠ” Colab λ…ΈνŠΈλΆ(μ‚¬μš© κ°€λŠ₯ν•œ 경우)이 μš”μ•½λ˜μ–΄ μžˆμŠ΅λ‹ˆλ‹€. - -| Pipeline | Paper | Tasks | Colab -|---|---|:---:|:---:| -| [alt_diffusion](./api/pipelines/alt_diffusion) | [**AltDiffusion**](https://arxiv.org/abs/2211.06679) | Image-to-Image Text-Guided Generation | -| [audio_diffusion](./api/pipelines/audio_diffusion) | [**Audio Diffusion**](https://github.com/teticio/audio-diffusion.git) | Unconditional Audio Generation | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/teticio/audio-diffusion/blob/master/notebooks/audio_diffusion_pipeline.ipynb) -| [cycle_diffusion](./api/pipelines/cycle_diffusion) | [**Cycle Diffusion**](https://arxiv.org/abs/2210.05559) | Image-to-Image Text-Guided Generation | -| [dance_diffusion](./api/pipelines/dance_diffusion) | [**Dance Diffusion**](https://github.com/williamberman/diffusers.git) | Unconditional Audio Generation | -| [ddpm](./api/pipelines/ddpm) | [**Denoising Diffusion Probabilistic Models**](https://arxiv.org/abs/2006.11239) | Unconditional Image Generation | -| [ddim](./api/pipelines/ddim) | [**Denoising Diffusion Implicit Models**](https://arxiv.org/abs/2010.02502) | Unconditional Image Generation | -| [latent_diffusion](./api/pipelines/latent_diffusion) | [**High-Resolution Image Synthesis with Latent Diffusion Models**](https://arxiv.org/abs/2112.10752)| Text-to-Image Generation | -| [latent_diffusion](./api/pipelines/latent_diffusion) | [**High-Resolution Image Synthesis with Latent Diffusion Models**](https://arxiv.org/abs/2112.10752)| Super Resolution Image-to-Image | -| [latent_diffusion_uncond](./api/pipelines/latent_diffusion_uncond) | [**High-Resolution Image Synthesis with Latent Diffusion Models**](https://arxiv.org/abs/2112.10752) | Unconditional Image Generation | -| [paint_by_example](./api/pipelines/paint_by_example) | [**Paint by Example: Exemplar-based Image Editing with Diffusion Models**](https://arxiv.org/abs/2211.13227) | Image-Guided Image Inpainting | -| [pndm](./api/pipelines/pndm) | [**Pseudo Numerical Methods for Diffusion Models on Manifolds**](https://arxiv.org/abs/2202.09778) | Unconditional Image Generation | -| [score_sde_ve](./api/pipelines/score_sde_ve) | [**Score-Based Generative Modeling through Stochastic Differential Equations**](https://openreview.net/forum?id=PxTIG12RRHS) | Unconditional Image Generation | -| [score_sde_vp](./api/pipelines/score_sde_vp) | [**Score-Based Generative Modeling through Stochastic Differential Equations**](https://openreview.net/forum?id=PxTIG12RRHS) | Unconditional Image Generation | -| [stable_diffusion](./api/pipelines/stable_diffusion/text2img) | [**Stable Diffusion**](https://stability.ai/blog/stable-diffusion-public-release) | Text-to-Image Generation | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/training_example.ipynb) -| [stable_diffusion](./api/pipelines/stable_diffusion/img2img) | [**Stable Diffusion**](https://stability.ai/blog/stable-diffusion-public-release) | Image-to-Image Text-Guided Generation | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/image_2_image_using_diffusers.ipynb) -| [stable_diffusion](./api/pipelines/stable_diffusion/inpaint) | [**Stable Diffusion**](https://stability.ai/blog/stable-diffusion-public-release) | Text-Guided Image Inpainting | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/in_painting_with_stable_diffusion_using_diffusers.ipynb) -| [stable_diffusion_2](./api/pipelines/stable_diffusion_2) | [**Stable Diffusion 2**](https://stability.ai/blog/stable-diffusion-v2-release) | Text-to-Image Generation | -| [stable_diffusion_2](./api/pipelines/stable_diffusion_2) | [**Stable Diffusion 2**](https://stability.ai/blog/stable-diffusion-v2-release) | Text-Guided Image Inpainting | -| [stable_diffusion_2](./api/pipelines/stable_diffusion_2) | [**Stable Diffusion 2**](https://stability.ai/blog/stable-diffusion-v2-release) | Text-Guided Super Resolution Image-to-Image | -| [stable_diffusion_safe](./api/pipelines/stable_diffusion_safe) | [**Safe Stable Diffusion**](https://arxiv.org/abs/2211.05105) | Text-Guided Generation | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ml-research/safe-latent-diffusion/blob/main/examples/Safe%20Latent%20Diffusion.ipynb) -| [stochastic_karras_ve](./api/pipelines/stochastic_karras_ve) | [**Elucidating the Design Space of Diffusion-Based Generative Models**](https://arxiv.org/abs/2206.00364) | Unconditional Image Generation | -| [unclip](./api/pipelines/unclip) | [Hierarchical Text-Conditional Image Generation with CLIP Latents](https://arxiv.org/abs/2204.06125) | Text-to-Image Generation | -| [versatile_diffusion](./api/pipelines/versatile_diffusion) | [Versatile Diffusion: Text, Images and Variations All in One Diffusion Model](https://arxiv.org/abs/2211.08332) | Text-to-Image Generation | -| [versatile_diffusion](./api/pipelines/versatile_diffusion) | [Versatile Diffusion: Text, Images and Variations All in One Diffusion Model](https://arxiv.org/abs/2211.08332) | Image Variations Generation | -| [versatile_diffusion](./api/pipelines/versatile_diffusion) | [Versatile Diffusion: Text, Images and Variations All in One Diffusion Model](https://arxiv.org/abs/2211.08332) | Dual Image and Text Guided Generation | -| [vq_diffusion](./api/pipelines/vq_diffusion) | [Vector Quantized Diffusion Model for Text-to-Image Synthesis](https://arxiv.org/abs/2111.14822) | Text-to-Image Generation | - -**μ°Έκ³ **: νŒŒμ΄ν”„λΌμΈμ€ ν•΄λ‹Ή λ¬Έμ„œμ— μ„€λͺ…λœ λŒ€λ‘œ ν™•μ‚° μ‹œμŠ€ν…œμ„ μ‚¬μš©ν•œ 방법에 λŒ€ν•œ κ°„λ‹¨ν•œ μ˜ˆμž…λ‹ˆλ‹€. + +# Diffusers + +πŸ€— DiffusersλŠ” 이미지, μ˜€λ””μ˜€, 심지어 λΆ„μžμ˜ 3D ꡬ쑰λ₯Ό μƒμ„±ν•˜κΈ° μœ„ν•œ μ΅œμ²¨λ‹¨ 사전 ν›ˆλ ¨λœ diffusion λͺ¨λΈμ„ μœ„ν•œ λΌμ΄λΈŒλŸ¬λ¦¬μž…λ‹ˆλ‹€. κ°„λ‹¨ν•œ μΆ”λ‘  μ†”λ£¨μ…˜μ„ μ°Ύκ³  μžˆλ“ , 자체 diffusion λͺ¨λΈμ„ ν›ˆλ ¨ν•˜κ³  μ‹Άλ“ , πŸ€— DiffusersλŠ” 두 κ°€μ§€ λͺ¨λ‘λ₯Ό μ§€μ›ν•˜λŠ” λͺ¨λ“ˆμ‹ νˆ΄λ°•μŠ€μž…λ‹ˆλ‹€. 저희 λΌμ΄λΈŒλŸ¬λ¦¬λŠ” [μ„±λŠ₯보닀 μ‚¬μš©μ„±](conceptual/philosophy#usability-over-performance), [κ°„νŽΈν•¨λ³΄λ‹€ λ‹¨μˆœν•¨](conceptual/philosophy#simple-over-easy), 그리고 [좔상화보닀 μ‚¬μš©μž μ§€μ • κ°€λŠ₯μ„±](conceptual/philosophy#tweakable-contributorfriendly-over-abstraction)에 쀑점을 두고 μ„€κ³„λ˜μ—ˆμŠ΅λ‹ˆλ‹€. + +이 λΌμ΄λΈŒλŸ¬λ¦¬μ—λŠ” μ„Έ κ°€μ§€ μ£Όμš” ꡬ성 μš”μ†Œκ°€ μžˆμŠ΅λ‹ˆλ‹€: + +- λͺ‡ μ€„μ˜ μ½”λ“œλ§ŒμœΌλ‘œ μΆ”λ‘ ν•  수 μžˆλŠ” μ΅œμ²¨λ‹¨ [diffusion νŒŒμ΄ν”„λΌμΈ](api/pipelines/overview). +- 생성 속도와 ν’ˆμ§ˆ κ°„μ˜ κ· ν˜•μ„ λ§žμΆ”κΈ° μœ„ν•΄ μƒν˜Έκ΅ν™˜μ μœΌλ‘œ μ‚¬μš©ν•  수 μžˆλŠ” [λ…Έμ΄μ¦ˆ μŠ€μΌ€μ€„λŸ¬](api/schedulers/overview). +- λΉŒλ”© λΈ”λ‘μœΌλ‘œ μ‚¬μš©ν•  수 있고 μŠ€μΌ€μ€„λŸ¬μ™€ κ²°ν•©ν•˜μ—¬ 자체적인 end-to-end diffusion μ‹œμŠ€ν…œμ„ λ§Œλ“€ 수 μžˆλŠ” 사전 ν•™μŠ΅λœ [λͺ¨λΈ](api/models). + +
+
+
Tutorials
+

좜λ ₯물을 μƒμ„±ν•˜κ³ , λ‚˜λ§Œμ˜ diffusion μ‹œμŠ€ν…œμ„ κ΅¬μΆ•ν•˜κ³ , ν™•μ‚° λͺ¨λΈμ„ ν›ˆλ ¨ν•˜λŠ” 데 ν•„μš”ν•œ κΈ°λ³Έ κΈ°μˆ μ„ λ°°μ›Œλ³΄μ„Έμš”. πŸ€— Diffusersλ₯Ό 처음 μ‚¬μš©ν•˜λŠ” 경우 μ—¬κΈ°μ—μ„œ μ‹œμž‘ν•˜λŠ” 것이 μ’‹μŠ΅λ‹ˆλ‹€!

+
+
How-to guides
+

νŒŒμ΄ν”„λΌμΈ, λͺ¨λΈ, μŠ€μΌ€μ€„λŸ¬λ₯Ό λ‘œλ“œν•˜λŠ” 데 도움이 λ˜λŠ” μ‹€μš©μ μΈ κ°€μ΄λ“œμž…λ‹ˆλ‹€. λ˜ν•œ νŠΉμ • μž‘μ—…μ— νŒŒμ΄ν”„λΌμΈμ„ μ‚¬μš©ν•˜κ³ , 좜λ ₯ 생성 방식을 μ œμ–΄ν•˜κ³ , μΆ”λ‘  속도에 맞게 μ΅œμ ν™”ν•˜κ³ , λ‹€μ–‘ν•œ ν•™μŠ΅ 기법을 μ‚¬μš©ν•˜λŠ” 방법도 배울 수 μžˆμŠ΅λ‹ˆλ‹€.

+
+
Conceptual guides
+

λΌμ΄λΈŒλŸ¬λ¦¬κ°€ μ™œ 이런 λ°©μ‹μœΌλ‘œ μ„€κ³„λ˜μ—ˆλŠ”μ§€ μ΄ν•΄ν•˜κ³ , 라이브러리 μ΄μš©μ— λŒ€ν•œ 윀리적 κ°€μ΄λ“œλΌμΈκ³Ό μ•ˆμ „ κ΅¬ν˜„μ— λŒ€ν•΄ μžμ„Ένžˆ μ•Œμ•„λ³΄μ„Έμš”.

+
+
Reference
+

πŸ€— Diffusers 클래슀 및 λ©”μ„œλ“œμ˜ μž‘λ™ 방식에 λŒ€ν•œ 기술 μ„€λͺ….

+
+
+
+ +## Supported pipelines + +| Pipeline | Paper/Repository | Tasks | +|---|---|:---:| +| [alt_diffusion](./api/pipelines/alt_diffusion) | [AltCLIP: Altering the Language Encoder in CLIP for Extended Language Capabilities](https://arxiv.org/abs/2211.06679) | Image-to-Image Text-Guided Generation | +| [audio_diffusion](./api/pipelines/audio_diffusion) | [Audio Diffusion](https://github.com/teticio/audio-diffusion.git) | Unconditional Audio Generation | +| [controlnet](./api/pipelines/stable_diffusion/controlnet) | [Adding Conditional Control to Text-to-Image Diffusion Models](https://arxiv.org/abs/2302.05543) | Image-to-Image Text-Guided Generation | +| [cycle_diffusion](./api/pipelines/cycle_diffusion) | [Unifying Diffusion Models' Latent Space, with Applications to CycleDiffusion and Guidance](https://arxiv.org/abs/2210.05559) | Image-to-Image Text-Guided Generation | +| [dance_diffusion](./api/pipelines/dance_diffusion) | [Dance Diffusion](https://github.com/williamberman/diffusers.git) | Unconditional Audio Generation | +| [ddpm](./api/pipelines/ddpm) | [Denoising Diffusion Probabilistic Models](https://arxiv.org/abs/2006.11239) | Unconditional Image Generation | +| [ddim](./api/pipelines/ddim) | [Denoising Diffusion Implicit Models](https://arxiv.org/abs/2010.02502) | Unconditional Image Generation | +| [if](./if) | [**IF**](./api/pipelines/if) | Image Generation | +| [if_img2img](./if) | [**IF**](./api/pipelines/if) | Image-to-Image Generation | +| [if_inpainting](./if) | [**IF**](./api/pipelines/if) | Image-to-Image Generation | +| [latent_diffusion](./api/pipelines/latent_diffusion) | [High-Resolution Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752)| Text-to-Image Generation | +| [latent_diffusion](./api/pipelines/latent_diffusion) | [High-Resolution Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752)| Super Resolution Image-to-Image | +| [latent_diffusion_uncond](./api/pipelines/latent_diffusion_uncond) | [High-Resolution Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) | Unconditional Image Generation | +| [paint_by_example](./api/pipelines/paint_by_example) | [Paint by Example: Exemplar-based Image Editing with Diffusion Models](https://arxiv.org/abs/2211.13227) | Image-Guided Image Inpainting | +| [pndm](./api/pipelines/pndm) | [Pseudo Numerical Methods for Diffusion Models on Manifolds](https://arxiv.org/abs/2202.09778) | Unconditional Image Generation | +| [score_sde_ve](./api/pipelines/score_sde_ve) | [Score-Based Generative Modeling through Stochastic Differential Equations](https://openreview.net/forum?id=PxTIG12RRHS) | Unconditional Image Generation | +| [score_sde_vp](./api/pipelines/score_sde_vp) | [Score-Based Generative Modeling through Stochastic Differential Equations](https://openreview.net/forum?id=PxTIG12RRHS) | Unconditional Image Generation | +| [semantic_stable_diffusion](./api/pipelines/semantic_stable_diffusion) | [Semantic Guidance](https://arxiv.org/abs/2301.12247) | Text-Guided Generation | +| [stable_diffusion_text2img](./api/pipelines/stable_diffusion/text2img) | [Stable Diffusion](https://stability.ai/blog/stable-diffusion-public-release) | Text-to-Image Generation | +| [stable_diffusion_img2img](./api/pipelines/stable_diffusion/img2img) | [Stable Diffusion](https://stability.ai/blog/stable-diffusion-public-release) | Image-to-Image Text-Guided Generation | +| [stable_diffusion_inpaint](./api/pipelines/stable_diffusion/inpaint) | [Stable Diffusion](https://stability.ai/blog/stable-diffusion-public-release) | Text-Guided Image Inpainting | +| [stable_diffusion_panorama](./api/pipelines/stable_diffusion/panorama) | [MultiDiffusion](https://multidiffusion.github.io/) | Text-to-Panorama Generation | +| [stable_diffusion_pix2pix](./api/pipelines/stable_diffusion/pix2pix) | [InstructPix2Pix: Learning to Follow Image Editing Instructions](https://arxiv.org/abs/2211.09800) | Text-Guided Image Editing| +| [stable_diffusion_pix2pix_zero](./api/pipelines/stable_diffusion/pix2pix_zero) | [Zero-shot Image-to-Image Translation](https://pix2pixzero.github.io/) | Text-Guided Image Editing | +| [stable_diffusion_attend_and_excite](./api/pipelines/stable_diffusion/attend_and_excite) | [Attend-and-Excite: Attention-Based Semantic Guidance for Text-to-Image Diffusion Models](https://arxiv.org/abs/2301.13826) | Text-to-Image Generation | +| [stable_diffusion_self_attention_guidance](./api/pipelines/stable_diffusion/self_attention_guidance) | [Improving Sample Quality of Diffusion Models Using Self-Attention Guidance](https://arxiv.org/abs/2210.00939) | Text-to-Image Generation Unconditional Image Generation | +| [stable_diffusion_image_variation](./stable_diffusion/image_variation) | [Stable Diffusion Image Variations](https://github.com/LambdaLabsML/lambda-diffusers#stable-diffusion-image-variations) | Image-to-Image Generation | +| [stable_diffusion_latent_upscale](./stable_diffusion/latent_upscale) | [Stable Diffusion Latent Upscaler](https://twitter.com/StabilityAI/status/1590531958815064065) | Text-Guided Super Resolution Image-to-Image | +| [stable_diffusion_model_editing](./api/pipelines/stable_diffusion/model_editing) | [Editing Implicit Assumptions in Text-to-Image Diffusion Models](https://time-diffusion.github.io/) | Text-to-Image Model Editing | +| [stable_diffusion_2](./api/pipelines/stable_diffusion_2) | [Stable Diffusion 2](https://stability.ai/blog/stable-diffusion-v2-release) | Text-to-Image Generation | +| [stable_diffusion_2](./api/pipelines/stable_diffusion_2) | [Stable Diffusion 2](https://stability.ai/blog/stable-diffusion-v2-release) | Text-Guided Image Inpainting | +| [stable_diffusion_2](./api/pipelines/stable_diffusion_2) | [Depth-Conditional Stable Diffusion](https://github.com/Stability-AI/stablediffusion#depth-conditional-stable-diffusion) | Depth-to-Image Generation | +| [stable_diffusion_2](./api/pipelines/stable_diffusion_2) | [Stable Diffusion 2](https://stability.ai/blog/stable-diffusion-v2-release) | Text-Guided Super Resolution Image-to-Image | +| [stable_diffusion_safe](./api/pipelines/stable_diffusion_safe) | [Safe Stable Diffusion](https://arxiv.org/abs/2211.05105) | Text-Guided Generation | +| [stable_unclip](./stable_unclip) | Stable unCLIP | Text-to-Image Generation | +| [stable_unclip](./stable_unclip) | Stable unCLIP | Image-to-Image Text-Guided Generation | +| [stochastic_karras_ve](./api/pipelines/stochastic_karras_ve) | [Elucidating the Design Space of Diffusion-Based Generative Models](https://arxiv.org/abs/2206.00364) | Unconditional Image Generation | +| [text_to_video_sd](./api/pipelines/text_to_video) | [Modelscope's Text-to-video-synthesis Model in Open Domain](https://modelscope.cn/models/damo/text-to-video-synthesis/summary) | Text-to-Video Generation | +| [unclip](./api/pipelines/unclip) | [Hierarchical Text-Conditional Image Generation with CLIP Latents](https://arxiv.org/abs/2204.06125)(implementation by [kakaobrain](https://github.com/kakaobrain/karlo)) | Text-to-Image Generation | +| [versatile_diffusion](./api/pipelines/versatile_diffusion) | [Versatile Diffusion: Text, Images and Variations All in One Diffusion Model](https://arxiv.org/abs/2211.08332) | Text-to-Image Generation | +| [versatile_diffusion](./api/pipelines/versatile_diffusion) | [Versatile Diffusion: Text, Images and Variations All in One Diffusion Model](https://arxiv.org/abs/2211.08332) | Image Variations Generation | +| [versatile_diffusion](./api/pipelines/versatile_diffusion) | [Versatile Diffusion: Text, Images and Variations All in One Diffusion Model](https://arxiv.org/abs/2211.08332) | Dual Image and Text Guided Generation | +| [vq_diffusion](./api/pipelines/vq_diffusion) | [Vector Quantized Diffusion Model for Text-to-Image Synthesis](https://arxiv.org/abs/2111.14822) | Text-to-Image Generation | From cb1873cbcda54f198db1f1b57060f1d02a5851be Mon Sep 17 00:00:00 2001 From: hyeminan Date: Sun, 30 Jul 2023 15:53:18 +0900 Subject: [PATCH 3/4] index_update --- docs/source/ko/index.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/ko/index.mdx b/docs/source/ko/index.mdx index 59f31131fa7f..a83dd0d0b29e 100644 --- a/docs/source/ko/index.mdx +++ b/docs/source/ko/index.mdx @@ -31,7 +31,7 @@ specific language governing permissions and limitations under the License.
Tutorials
-

좜λ ₯물을 μƒμ„±ν•˜κ³ , λ‚˜λ§Œμ˜ diffusion μ‹œμŠ€ν…œμ„ κ΅¬μΆ•ν•˜κ³ , ν™•μ‚° λͺ¨λΈμ„ ν›ˆλ ¨ν•˜λŠ” 데 ν•„μš”ν•œ κΈ°λ³Έ κΈ°μˆ μ„ λ°°μ›Œλ³΄μ„Έμš”. πŸ€— Diffusersλ₯Ό 처음 μ‚¬μš©ν•˜λŠ” 경우 μ—¬κΈ°μ—μ„œ μ‹œμž‘ν•˜λŠ” 것이 μ’‹μŠ΅λ‹ˆλ‹€!

+

결과물을 μƒμ„±ν•˜κ³ , λ‚˜λ§Œμ˜ diffusion μ‹œμŠ€ν…œμ„ κ΅¬μΆ•ν•˜κ³ , ν™•μ‚° λͺ¨λΈμ„ ν›ˆλ ¨ν•˜λŠ” 데 ν•„μš”ν•œ κΈ°λ³Έ κΈ°μˆ μ„ λ°°μ›Œλ³΄μ„Έμš”. πŸ€— Diffusersλ₯Ό 처음 μ‚¬μš©ν•˜λŠ” 경우 μ—¬κΈ°μ—μ„œ μ‹œμž‘ν•˜λŠ” 것이 μ’‹μŠ΅λ‹ˆλ‹€!

How-to guides
From b33e21acb7ffd1afa7c0684ef7202e7b38b40aca Mon Sep 17 00:00:00 2001 From: hyeminan Date: Sun, 30 Jul 2023 16:46:24 +0900 Subject: [PATCH 4/4] condition_image_generation --- .../conditional_image_generation.mdx | 60 +++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 docs/source/ko/using-diffusers/conditional_image_generation.mdx diff --git a/docs/source/ko/using-diffusers/conditional_image_generation.mdx b/docs/source/ko/using-diffusers/conditional_image_generation.mdx new file mode 100644 index 000000000000..5525ac990ca4 --- /dev/null +++ b/docs/source/ko/using-diffusers/conditional_image_generation.mdx @@ -0,0 +1,60 @@ + + +# 쑰건뢀 이미지 생성 + +[[open-in-colab]] + +쑰건뢀 이미지 생성을 μ‚¬μš©ν•˜λ©΄ ν…μŠ€νŠΈ ν”„λ‘¬ν”„νŠΈμ—μ„œ 이미지λ₯Ό 생성할 수 μžˆμŠ΅λ‹ˆλ‹€. ν…μŠ€νŠΈλŠ” μž„λ² λ”©μœΌλ‘œ λ³€ν™˜λ˜λ©°, μž„λ² λ”©μ€ λ…Έμ΄μ¦ˆμ—μ„œ 이미지λ₯Ό μƒμ„±ν•˜λ„λ‘ λͺ¨λΈμ„ μ‘°κ±΄ν™”ν•˜λŠ” 데 μ‚¬μš©λ©λ‹ˆλ‹€. + +[`DiffusionPipeline`]은 좔둠을 μœ„ν•΄ 사전 ν›ˆλ ¨λœ diffusion μ‹œμŠ€ν…œμ„ μ‚¬μš©ν•˜λŠ” κ°€μž₯ μ‰¬μš΄ λ°©λ²•μž…λ‹ˆλ‹€. + +λ¨Όμ € [`DiffusionPipeline`]의 μΈμŠ€ν„΄μŠ€λ₯Ό μƒμ„±ν•˜κ³  λ‹€μš΄λ‘œλ“œν•  νŒŒμ΄ν”„λΌμΈ [체크포인트](https://huggingface.co/models?library=diffusers&sort=downloads)λ₯Ό μ§€μ •ν•©λ‹ˆλ‹€. + +이 κ°€μ΄λ“œμ—μ„œλŠ” [잠재 Diffusion](https://huggingface.co/CompVis/ldm-text2im-large-256)κ³Ό ν•¨κ»˜ ν…μŠ€νŠΈ-이미지 생성에 [`DiffusionPipeline`]을 μ‚¬μš©ν•©λ‹ˆλ‹€: + +```python +>>> from diffusers import DiffusionPipeline + +>>> generator = DiffusionPipeline.from_pretrained("CompVis/ldm-text2im-large-256") +``` + +[`DiffusionPipeline`]은 λͺ¨λ“  λͺ¨λΈλ§, 토큰화, μŠ€μΌ€μ€„λ§ ꡬ성 μš”μ†Œλ₯Ό λ‹€μš΄λ‘œλ“œν•˜κ³  μΊμ‹œν•©λ‹ˆλ‹€. +이 λͺ¨λΈμ€ μ•½ 14μ–΅ 개의 νŒŒλΌλ―Έν„°λ‘œ κ΅¬μ„±λ˜μ–΄ 있기 λ•Œλ¬Έμ— GPUμ—μ„œ μ‹€ν–‰ν•  것을 κ°•λ ₯히 ꢌμž₯ν•©λ‹ˆλ‹€. +PyTorchμ—μ„œμ™€ λ§ˆμ°¬κ°€μ§€λ‘œ 생성기 객체λ₯Ό GPU둜 이동할 수 μžˆμŠ΅λ‹ˆλ‹€: + +```python +>>> generator.to("cuda") +``` + +이제 ν…μŠ€νŠΈ ν”„λ‘¬ν”„νŠΈμ—μ„œ `생성기`λ₯Ό μ‚¬μš©ν•  수 μžˆμŠ΅λ‹ˆλ‹€: + +```python +>>> image = generator("An image of a squirrel in Picasso style").images[0] +``` + +좜λ ₯값은 기본적으둜 [`PIL.Image`](https://pillow.readthedocs.io/en/stable/reference/Image.html?highlight=image#the-image-class) 객체둜 λž˜ν•‘λ©λ‹ˆλ‹€. + +ν˜ΈμΆœν•˜μ—¬ 이미지λ₯Ό μ €μž₯ν•  수 μžˆμŠ΅λ‹ˆλ‹€: + +```python +>>> image.save("image_of_squirrel_painting.png") +``` + +μ•„λž˜ 슀페이슀λ₯Ό μ‚¬μš©ν•΄λ³΄κ³  μ•ˆλ‚΄ 배율 λ§€κ°œλ³€μˆ˜λ₯Ό 자유둭게 μ‘°μ •ν•˜μ—¬ 이미지 ν’ˆμ§ˆμ— μ–΄λ–€ 영ν–₯을 λ―ΈμΉ˜λŠ”μ§€ 확인해 λ³΄μ„Έμš”! + + \ No newline at end of file