From 82fc2196fed35aa4bba2dfcb0ef93075954ee037 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Wed, 14 Sep 2022 18:15:18 +0200 Subject: [PATCH 1/5] [Download] Smart downloading --- src/diffusers/pipeline_utils.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/diffusers/pipeline_utils.py b/src/diffusers/pipeline_utils.py index 84ee9e20f110..5646ffa5114a 100644 --- a/src/diffusers/pipeline_utils.py +++ b/src/diffusers/pipeline_utils.py @@ -30,6 +30,8 @@ from tqdm.auto import tqdm from .configuration_utils import ConfigMixin +from .modeling_utils import WEIGHTS_NAME +from .schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME from .utils import DIFFUSERS_CACHE, BaseOutput, logging @@ -285,6 +287,21 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P # 1. Download the checkpoints and configs # use snapshot download here to get it working from from_pretrained if not os.path.isdir(pretrained_model_name_or_path): + config_dict = cls.get_config_dict( + pretrained_model_name_or_path, + cache_dir=cache_dir, + resume_download=resume_download, + proxies=proxies, + local_files_only=local_files_only, + use_auth_token=use_auth_token, + revision=revision, + ) + # make sure we only download sub-folders and `diffusers` filenames + folder_names = [k for k in config_dict.keys() if not k.startswith("_")] + allow_patterns = [os.path.join(k, "*") for k in folder_names] + allow_patterns += [WEIGHTS_NAME, SCHEDULER_CONFIG_NAME, cls.config_name] + + # download all allow_patterns cached_folder = snapshot_download( pretrained_model_name_or_path, cache_dir=cache_dir, @@ -293,6 +310,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P local_files_only=local_files_only, use_auth_token=use_auth_token, revision=revision, + allow_patterns=allow_patterns, ) else: cached_folder = pretrained_model_name_or_path From ee589bb8f5a029b2d0ec4889f3a1c742c60ca288 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Wed, 14 Sep 2022 18:31:50 +0200 Subject: [PATCH 2/5] add test --- src/diffusers/pipeline_utils.py | 4 ++-- tests/test_pipelines.py | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/diffusers/pipeline_utils.py b/src/diffusers/pipeline_utils.py index 5646ffa5114a..b13f41ed7190 100644 --- a/src/diffusers/pipeline_utils.py +++ b/src/diffusers/pipeline_utils.py @@ -32,7 +32,7 @@ from .configuration_utils import ConfigMixin from .modeling_utils import WEIGHTS_NAME from .schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME -from .utils import DIFFUSERS_CACHE, BaseOutput, logging +from .utils import DIFFUSERS_CACHE, BaseOutput, logging, CONFIG_NAME INDEX_FILE = "diffusion_pytorch_model.bin" @@ -299,7 +299,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P # make sure we only download sub-folders and `diffusers` filenames folder_names = [k for k in config_dict.keys() if not k.startswith("_")] allow_patterns = [os.path.join(k, "*") for k in folder_names] - allow_patterns += [WEIGHTS_NAME, SCHEDULER_CONFIG_NAME, cls.config_name] + allow_patterns += [WEIGHTS_NAME, SCHEDULER_CONFIG_NAME, CONFIG_NAME, cls.config_name] # download all allow_patterns cached_folder = snapshot_download( diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py index 3d691368104e..c7cde0432f36 100644 --- a/tests/test_pipelines.py +++ b/tests/test_pipelines.py @@ -708,6 +708,13 @@ def tearDown(self): gc.collect() torch.cuda.empty_cache() + def test_smart_download(self): + with tempfile.TemporaryDirectory() as tmpdirname: + # let's make sure this super large numpy file is not downloaded! + _ = DiffusionPipeline.from_pretrained("hf-internal-testing/unet-pipeline-dummy", cache_dir=tmpdirname) + + # inspect all downloaded files to make sure that everything is included + def test_from_pretrained_save_pretrained(self): # 1. Load models model = UNet2DModel( From c891b82898b6ad3ba7984a292fd7b73b303a791c Mon Sep 17 00:00:00 2001 From: patrickvonplaten Date: Wed, 14 Sep 2022 19:47:10 +0200 Subject: [PATCH 3/5] finish test --- src/diffusers/pipeline_utils.py | 2 +- tests/test_pipelines.py | 22 ++++++++++++++++++++-- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/src/diffusers/pipeline_utils.py b/src/diffusers/pipeline_utils.py index b13f41ed7190..2fa7b69d2106 100644 --- a/src/diffusers/pipeline_utils.py +++ b/src/diffusers/pipeline_utils.py @@ -32,7 +32,7 @@ from .configuration_utils import ConfigMixin from .modeling_utils import WEIGHTS_NAME from .schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME -from .utils import DIFFUSERS_CACHE, BaseOutput, logging, CONFIG_NAME +from .utils import CONFIG_NAME, DIFFUSERS_CACHE, BaseOutput, logging INDEX_FILE = "diffusion_pytorch_model.bin" diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py index c7cde0432f36..018efef8a109 100644 --- a/tests/test_pipelines.py +++ b/tests/test_pipelines.py @@ -14,6 +14,7 @@ # limitations under the License. import gc +import os import random import tempfile import unittest @@ -46,8 +47,11 @@ UNet2DModel, VQModel, ) +from diffusers.modeling_utils import WEIGHTS_NAME from diffusers.pipeline_utils import DiffusionPipeline +from diffusers.schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME from diffusers.testing_utils import floats_tensor, slow, torch_device +from diffusers.utils import CONFIG_NAME from PIL import Image from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer @@ -709,11 +713,25 @@ def tearDown(self): torch.cuda.empty_cache() def test_smart_download(self): + model_id = "hf-internal-testing/unet-pipeline-dummy" with tempfile.TemporaryDirectory() as tmpdirname: - # let's make sure this super large numpy file is not downloaded! - _ = DiffusionPipeline.from_pretrained("hf-internal-testing/unet-pipeline-dummy", cache_dir=tmpdirname) + _ = DiffusionPipeline.from_pretrained(model_id, cache_dir=tmpdirname, force_download=True) + local_repo_name = "--".join(["models"] + model_id.split("/")) + snapshot_dir = os.path.join(tmpdirname, local_repo_name, "snapshots") + snapshot_dir = os.path.join(snapshot_dir, os.listdir(snapshot_dir)[0]) # inspect all downloaded files to make sure that everything is included + assert os.path.isfile(os.path.join(snapshot_dir, DiffusionPipeline.config_name)) + assert os.path.isfile(os.path.join(snapshot_dir, CONFIG_NAME)) + assert os.path.isfile(os.path.join(snapshot_dir, SCHEDULER_CONFIG_NAME)) + assert os.path.isfile(os.path.join(snapshot_dir, WEIGHTS_NAME)) + assert os.path.isfile(os.path.join(snapshot_dir, "scheduler", SCHEDULER_CONFIG_NAME)) + assert os.path.isfile(os.path.join(snapshot_dir, "unet", WEIGHTS_NAME)) + assert os.path.isfile(os.path.join(snapshot_dir, "unet", WEIGHTS_NAME)) + # let's make sure the super large numpy file: + # https://huggingface.co/hf-internal-testing/unet-pipeline-dummy/blob/main/big_array.npy + # is not downloaded, but all the expected ones + assert not os.path.isfile(os.path.join(snapshot_dir, "big_array.npy")) def test_from_pretrained_save_pretrained(self): # 1. Load models From 91dbe7828e166307b46930e8db6a5f27e9ef46f5 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Thu, 15 Sep 2022 16:47:01 +0200 Subject: [PATCH 4/5] update --- src/diffusers/onnx_utils.py | 2 -- src/diffusers/pipeline_utils.py | 5 +++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/diffusers/onnx_utils.py b/src/diffusers/onnx_utils.py index e840565dd5c1..3c2a0b482922 100644 --- a/src/diffusers/onnx_utils.py +++ b/src/diffusers/onnx_utils.py @@ -38,8 +38,6 @@ class OnnxRuntimeModel: - base_model_prefix = "onnx_model" - def __init__(self, model=None, **kwargs): logger.info("`diffusers.OnnxRuntimeModel` is experimental and might change in the future.") self.model = model diff --git a/src/diffusers/pipeline_utils.py b/src/diffusers/pipeline_utils.py index b13f41ed7190..bc888131de53 100644 --- a/src/diffusers/pipeline_utils.py +++ b/src/diffusers/pipeline_utils.py @@ -31,8 +31,9 @@ from .configuration_utils import ConfigMixin from .modeling_utils import WEIGHTS_NAME +from .onnx_utils import ONNX_WEIGHTS_NAME from .schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME -from .utils import DIFFUSERS_CACHE, BaseOutput, logging, CONFIG_NAME +from .utils import CONFIG_NAME, DIFFUSERS_CACHE, BaseOutput, logging INDEX_FILE = "diffusion_pytorch_model.bin" @@ -299,7 +300,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P # make sure we only download sub-folders and `diffusers` filenames folder_names = [k for k in config_dict.keys() if not k.startswith("_")] allow_patterns = [os.path.join(k, "*") for k in folder_names] - allow_patterns += [WEIGHTS_NAME, SCHEDULER_CONFIG_NAME, CONFIG_NAME, cls.config_name] + allow_patterns += [WEIGHTS_NAME, SCHEDULER_CONFIG_NAME, CONFIG_NAME, ONNX_WEIGHTS_NAME, cls.config_name] # download all allow_patterns cached_folder = snapshot_download( From ac558ae278c6be95e3b3775195b7c13c6ef4c78d Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Fri, 16 Sep 2022 19:23:53 +0200 Subject: [PATCH 5/5] make style --- tests/test_pipelines.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py index 2d330a2e5531..102a55a93e4b 100644 --- a/tests/test_pipelines.py +++ b/tests/test_pipelines.py @@ -51,7 +51,6 @@ from diffusers.schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME from diffusers.testing_utils import floats_tensor, load_image, slow, torch_device from diffusers.utils import CONFIG_NAME - from PIL import Image from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer