Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 0 additions & 28 deletions tests/models/test_models_unet_2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

import gc
import math
import tracemalloc
import unittest

import torch
Expand Down Expand Up @@ -155,33 +154,6 @@ def test_from_pretrained_accelerate_wont_change_results(self):

assert torch_all_close(arr_accelerate, arr_normal_load, rtol=1e-3)

@unittest.skipIf(torch_device != "cuda", "This test is supposed to run on GPU")
def test_memory_footprint_gets_reduced(self):
torch.cuda.empty_cache()
gc.collect()

tracemalloc.start()
# by defautl model loading will use accelerate as `low_cpu_mem_usage=True`
model_accelerate, _ = UNet2DModel.from_pretrained("fusing/unet-ldm-dummy-update", output_loading_info=True)
model_accelerate.to(torch_device)
model_accelerate.eval()
_, peak_accelerate = tracemalloc.get_traced_memory()

del model_accelerate
torch.cuda.empty_cache()
gc.collect()

model_normal_load, _ = UNet2DModel.from_pretrained(
"fusing/unet-ldm-dummy-update", output_loading_info=True, low_cpu_mem_usage=False
)
model_normal_load.to(torch_device)
model_normal_load.eval()
_, peak_normal = tracemalloc.get_traced_memory()

tracemalloc.stop()

assert peak_accelerate < peak_normal

def test_output_pretrained(self):
model = UNet2DModel.from_pretrained("fusing/unet-ldm-dummy-update")
model.eval()
Expand Down
8 changes: 4 additions & 4 deletions tests/pipelines/dit/test_dit.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,8 @@ def test_dit_256(self):
)
assert np.abs((expected_image - image).max()) < 1e-3

def test_dit_512_fp16(self):
pipe = DiTPipeline.from_pretrained("facebook/DiT-XL-2-512", torch_dtype=torch.float16)
def test_dit_512(self):
pipe = DiTPipeline.from_pretrained("facebook/DiT-XL-2-512")
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe.to("cuda")

Expand All @@ -139,7 +139,7 @@ def test_dit_512_fp16(self):
for word, image in zip(words, images):
expected_image = load_numpy(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
f"/dit/{word}_fp16.npy"
f"/dit/{word}_512.npy"
)

assert np.abs((expected_image - image).max()) < 7.5e-1
assert np.abs((expected_image - image).max()) < 1e-1
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,6 @@ def test_inference_superresolution(self):
init_image = init_image.resize((64, 64), resample=PIL_INTERPOLATION["lanczos"])

ldm = LDMSuperResolutionPipeline.from_pretrained("duongna/ldm-super-resolution", device_map="auto")
ldm.to(torch_device)
ldm.set_progress_bar_config(disable=None)

generator = torch.manual_seed(0)
Expand Down
60 changes: 29 additions & 31 deletions tests/pipelines/stable_diffusion/test_stable_diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
UNet2DConditionModel,
logging,
)
from diffusers.models.attention_processor import AttnProcessor
from diffusers.utils import load_numpy, nightly, slow, torch_device
from diffusers.utils.testing_utils import CaptureLogger, require_torch_gpu

Expand Down Expand Up @@ -698,7 +699,6 @@ def test_stable_diffusion_vae_tiling(self):
torch.cuda.reset_peak_memory_stats()
model_id = "CompVis/stable-diffusion-v1-4"
pipe = StableDiffusionPipeline.from_pretrained(model_id, revision="fp16", torch_dtype=torch.float16)
pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None)
pipe.enable_attention_slicing()
pipe.unet = pipe.unet.to(memory_format=torch.channels_last)
Expand All @@ -708,42 +708,36 @@ def test_stable_diffusion_vae_tiling(self):

# enable vae tiling
pipe.enable_vae_tiling()
generator = torch.Generator(device=torch_device).manual_seed(0)
with torch.autocast(torch_device):
output_chunked = pipe(
[prompt],
width=640,
height=640,
generator=generator,
guidance_scale=7.5,
num_inference_steps=2,
output_type="numpy",
)
image_chunked = output_chunked.images
pipe.enable_model_cpu_offload()
generator = torch.Generator(device="cpu").manual_seed(0)
output_chunked = pipe(
[prompt],
width=1024,
height=1024,
generator=generator,
guidance_scale=7.5,
num_inference_steps=2,
output_type="numpy",
)
image_chunked = output_chunked.images

mem_bytes = torch.cuda.max_memory_allocated()
torch.cuda.reset_peak_memory_stats()
# make sure that less than 4 GB is allocated
assert mem_bytes < 4e9

# disable vae tiling
pipe.disable_vae_tiling()
generator = torch.Generator(device=torch_device).manual_seed(0)
with torch.autocast(torch_device):
output = pipe(
[prompt],
width=640,
height=640,
generator=generator,
guidance_scale=7.5,
num_inference_steps=2,
output_type="numpy",
)
image = output.images
generator = torch.Generator(device="cpu").manual_seed(0)
output = pipe(
[prompt],
width=1024,
height=1024,
generator=generator,
guidance_scale=7.5,
num_inference_steps=2,
output_type="numpy",
)
image = output.images

# make sure that more than 4 GB is allocated
mem_bytes = torch.cuda.max_memory_allocated()
assert mem_bytes > 5e9
assert mem_bytes < 1e10
assert np.abs(image_chunked.flatten() - image.flatten()).max() < 1e-2

def test_stable_diffusion_fp16_vs_autocast(self):
Expand Down Expand Up @@ -849,6 +843,7 @@ def test_stable_diffusion_pipeline_with_model_offloading(self):
"CompVis/stable-diffusion-v1-4",
torch_dtype=torch.float16,
)
pipe.unet.set_attn_processor(AttnProcessor())
pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None)
outputs = pipe(**inputs)
Expand All @@ -861,13 +856,16 @@ def test_stable_diffusion_pipeline_with_model_offloading(self):
"CompVis/stable-diffusion-v1-4",
torch_dtype=torch.float16,
)
pipe.unet.set_attn_processor(AttnProcessor())

torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_peak_memory_stats()

pipe.enable_model_cpu_offload()
pipe.set_progress_bar_config(disable=None)
inputs = self.get_inputs(torch_device, dtype=torch.float16)

outputs_offloaded = pipe(**inputs)
mem_bytes_offloaded = torch.cuda.max_memory_allocated()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ def test_stable_diffusion_inpaint_fp16(self):
image_slice = image[0, 253:256, 253:256, -1].flatten()

assert image.shape == (1, 512, 512, 3)
expected_slice = np.array([0.1443, 0.1218, 0.1587, 0.1594, 0.1411, 0.1284, 0.1370, 0.1506, 0.2339])
expected_slice = np.array([0.1350, 0.1123, 0.1350, 0.1641, 0.1328, 0.1230, 0.1289, 0.1531, 0.1687])

assert np.abs(expected_slice - image_slice).max() < 5e-2

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -339,4 +339,4 @@ def test_stable_diffusion_panorama_pipeline_with_sequential_cpu_offloading(self)

mem_bytes = torch.cuda.max_memory_allocated()
# make sure that less than 5.2 GB is allocated
assert mem_bytes < 5.2 * 10**9
assert mem_bytes < 5.5 * 10**9
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ def test_stable_diffusion_pix2pix_inversion(self):
image_slice = inv_latents[0, -3:, -3:, -1].flatten()

assert inv_latents.shape == (1, 4, 64, 64)
expected_slice = np.array([0.8877, 0.0587, 0.7700, -1.6035, -0.5962, 0.4827, -0.6265, 1.0498, -0.8599])
expected_slice = np.array([0.8447, -0.0730, 0.7588, -1.2070, -0.4678, 0.1511, -0.8555, 1.1816, -0.7666])

assert np.abs(expected_slice - image_slice.cpu().numpy()).max() < 5e-2

Expand All @@ -383,7 +383,7 @@ def test_stable_diffusion_2_pix2pix_inversion(self):
image_slice = inv_latents[0, -3:, -3:, -1].flatten()

assert inv_latents.shape == (1, 4, 64, 64)
expected_slice = np.array([0.7515, -0.2397, 0.4922, -0.9736, -0.7031, 0.4846, -1.0781, 1.1309, -0.6973])
expected_slice = np.array([0.8970, -0.1611, 0.4766, -1.1162, -0.5923, 0.1050, -0.9678, 1.0537, -0.6050])

assert np.abs(expected_slice - image_slice.cpu().numpy()).max() < 5e-2

Expand Down
4 changes: 4 additions & 0 deletions tests/pipelines/stable_diffusion_2/test_stable_diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
UNet2DConditionModel,
logging,
)
from diffusers.models.attention_processor import AttnProcessor
from diffusers.utils import load_numpy, nightly, slow, torch_device
from diffusers.utils.testing_utils import CaptureLogger, require_torch_gpu

Expand Down Expand Up @@ -409,6 +410,7 @@ def test_stable_diffusion_pipeline_with_model_offloading(self):
"stabilityai/stable-diffusion-2-base",
torch_dtype=torch.float16,
)
pipe.unet.set_attn_processor(AttnProcessor())
pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None)
outputs = pipe(**inputs)
Expand All @@ -421,13 +423,15 @@ def test_stable_diffusion_pipeline_with_model_offloading(self):
"stabilityai/stable-diffusion-2-base",
torch_dtype=torch.float16,
)
pipe.unet.set_attn_processor(AttnProcessor())

torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_peak_memory_stats()

pipe.enable_model_cpu_offload()
pipe.set_progress_bar_config(disable=None)
inputs = self.get_inputs(torch_device, dtype=torch.float16)
outputs_offloaded = pipe(**inputs)
mem_bytes_offloaded = torch.cuda.max_memory_allocated()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -358,5 +358,5 @@ def test_stable_diffusion_pipeline_with_sequential_cpu_offloading(self):
)

mem_bytes = torch.cuda.max_memory_allocated()
# make sure that less than 2.65 GB is allocated
assert mem_bytes < 2.65 * 10**9
# make sure that less than 2.9 GB is allocated
assert mem_bytes < 2.9 * 10**9
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,13 @@
import torch

from diffusers import VersatileDiffusionDualGuidedPipeline
from diffusers.utils.testing_utils import load_image, require_torch_gpu, slow, torch_device
from diffusers.utils.testing_utils import load_image, nightly, require_torch_gpu, torch_device


torch.backends.cuda.matmul.allow_tf32 = False


class VersatileDiffusionDualGuidedPipelineFastTests(unittest.TestCase):
pass


@slow
@nightly
@require_torch_gpu
class VersatileDiffusionDualGuidedPipelineIntegrationTests(unittest.TestCase):
def tearDown(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import torch

from diffusers import VersatileDiffusionPipeline
from diffusers.utils.testing_utils import load_image, require_torch_gpu, slow, torch_device
from diffusers.utils.testing_utils import load_image, nightly, require_torch_gpu, torch_device


torch.backends.cuda.matmul.allow_tf32 = False
Expand All @@ -31,7 +31,7 @@ class VersatileDiffusionMegaPipelineFastTests(unittest.TestCase):
pass


@slow
@nightly
@require_torch_gpu
class VersatileDiffusionMegaPipelineIntegrationTests(unittest.TestCase):
def tearDown(self):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_ema.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,4 +153,4 @@ def test_serialization(self):
output = unet(noisy_latents, timesteps, encoder_hidden_states).sample
output_loaded = loaded_unet(noisy_latents, timesteps, encoder_hidden_states).sample

assert torch.allclose(output, output_loaded)
assert torch.allclose(output, output_loaded, atol=1e-4)
9 changes: 9 additions & 0 deletions tests/test_modeling_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from requests.exceptions import HTTPError

from diffusers.models import ModelMixin, UNet2DConditionModel
from diffusers.models.attention_processor import AttnProcessor
from diffusers.training_utils import EMAModel
from diffusers.utils import torch_device

Expand Down Expand Up @@ -105,12 +106,16 @@ def test_from_save_pretrained(self):
init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()

model = self.model_class(**init_dict)
if hasattr(model, "set_attn_processor"):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to reduce numerical instability

model.set_attn_processor(AttnProcessor())
model.to(torch_device)
model.eval()

with tempfile.TemporaryDirectory() as tmpdirname:
model.save_pretrained(tmpdirname)
new_model = self.model_class.from_pretrained(tmpdirname)
if hasattr(new_model, "set_attn_processor"):
new_model.set_attn_processor(AttnProcessor())
new_model.to(torch_device)

with torch.no_grad():
Expand All @@ -135,12 +140,16 @@ def test_from_save_pretrained_variant(self):
init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()

model = self.model_class(**init_dict)
if hasattr(model, "set_attn_processor"):
model.set_attn_processor(AttnProcessor())
model.to(torch_device)
model.eval()

with tempfile.TemporaryDirectory() as tmpdirname:
model.save_pretrained(tmpdirname, variant="fp16")
new_model = self.model_class.from_pretrained(tmpdirname, variant="fp16")
if hasattr(new_model, "set_attn_processor"):
new_model.set_attn_processor(AttnProcessor())

# non-variant cannot be loaded
with self.assertRaises(OSError) as error_context:
Expand Down
2 changes: 1 addition & 1 deletion tests/test_pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -1123,7 +1123,7 @@ def test_weighted_prompts_compel(self):
f"/compel/forest_{i}.npy"
)

assert np.abs(image - expected_image).max() < 1e-3
assert np.abs(image - expected_image).max() < 1e-2


@nightly
Expand Down