From efb8f323fc5f10378589af82ac49daa8ae4e1b12 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Thu, 8 Sep 2022 14:43:04 +0200 Subject: [PATCH 1/2] up --- _ | 53 ++++++++++++++++++++++++++++++++++ docs/source/api/outputs.mdx | 49 ++++++++++++++++++++++++------- src/diffusers/utils/outputs.py | 38 +++--------------------- 3 files changed, 95 insertions(+), 45 deletions(-) create mode 100644 _ diff --git a/_ b/_ new file mode 100644 index 000000000000..cfe4b4c0d1c7 --- /dev/null +++ b/_ @@ -0,0 +1,53 @@ + + +# BaseOutputs + +All models have outputs that are instances of subclasses of [`~utils.BaseOutput`]. Those are +data structures containing all the information returned by the model, but that can also be used as tuples or +dictionaries. + +Let's see how this looks in an example: + +```python +from diffusers import DDIMPipeline + +pipeline = DDIMPipeline.from_pretrained("google/ddpm-cifar10-32") +outputs = pipeline() +``` + +The `outputs` object is a [`~pipeline_utils.ImagePipelineOutput`], as we can see in the +documentation of that class below, it means it has an image attribute. + +You can access each attribute as you would usually do, and if that attribute has not been returned by the model, you will get `None`. + +When considering our `outputs` object as tuple, it only considers the attributes that don't have `None` values. +Here for instance, we could retrieve images via indexing: + +```python +outputs[:1] +``` + +which will return the tuple `(outputs.images)` for instance. + +## BaseOutput + +[[autodoc]] utils.BaseOutput + - to_tuple + + +tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") +model = BertForSequenceClassification.from_pretrained("bert-base-uncased") + +inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") +labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 +outputs = model(**inputs, labels=labels) diff --git a/docs/source/api/outputs.mdx b/docs/source/api/outputs.mdx index 5c435dc8e1f1..010761fb2e4b 100644 --- a/docs/source/api/outputs.mdx +++ b/docs/source/api/outputs.mdx @@ -10,19 +10,46 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o specific language governing permissions and limitations under the License. --> -# Models +# BaseOutputs -Diffusers contains pretrained models for popular algorithms and modules for creating the next set of diffusion models. -The primary function of these models is to denoise an input sample, by modeling the distribution $p_\theta(\mathbf{x}_{t-1}|\mathbf{x}_t)$. -The models are built on the base class ['ModelMixin'] that is a `torch.nn.module` with basic functionality for saving and loading models both locally and from the HuggingFace hub. +All models have outputs that are instances of subclasses of [`~utils.BaseOutput`]. Those are +data structures containing all the information returned by the model, but that can also be used as tuples or +dictionaries. -## API +Let's see how this looks in an example: -Models should provide the `def forward` function and initialization of the model. -All saving, loading, and utilities should be in the base ['ModelMixin'] class. +```python +from diffusers import DDIMPipeline -## Examples +pipeline = DDIMPipeline.from_pretrained("google/ddpm-cifar10-32") +outputs = pipeline() +``` -- The ['UNetModel'] was proposed in [TODO](https://arxiv.org/) and has been used in paper1, paper2, paper3. -- Extensions of the ['UNetModel'] include the ['UNetGlideModel'] that uses attention and timestep embeddings for the [GLIDE](https://arxiv.org/abs/2112.10741) paper, the ['UNetGradTTS'] model from this [paper](https://arxiv.org/abs/2105.06337) for text-to-speech, ['UNetLDMModel'] for latent-diffusion models in this [paper](https://arxiv.org/abs/2112.10752), and the ['TemporalUNet'] used for time-series prediciton in this reinforcement learning [paper](https://arxiv.org/abs/2205.09991). -- TODO: mention VAE / SDE score estimation \ No newline at end of file +The `outputs` object is a [`~pipeline_utils.ImagePipelineOutput`], as we can see in the +documentation of that class below, it means it has an image attribute. + +You can access each attribute as you would usually do, and if that attribute has not been returned by the model, you will get `None`: + +```python +outputs.images +``` + +or via keyword lookup + +```python +outputs["images"] +``` + +When considering our `outputs` object as tuple, it only considers the attributes that don't have `None` values. +Here for instance, we could retrieve images via indexing: + +```python +outputs[:1] +``` + +which will return the tuple `(outputs.images)` for instance. + +## BaseOutput + +[[autodoc]] utils.BaseOutput + - to_tuple diff --git a/src/diffusers/utils/outputs.py b/src/diffusers/utils/outputs.py index d8e695db59b0..b02f62d02d03 100644 --- a/src/diffusers/utils/outputs.py +++ b/src/diffusers/utils/outputs.py @@ -59,40 +59,10 @@ def __post_init__(self): if not len(class_fields): raise ValueError(f"{self.__class__.__name__} has no fields.") - first_field = getattr(self, class_fields[0].name) - other_fields_are_none = all(getattr(self, field.name) is None for field in class_fields[1:]) - - if other_fields_are_none and not is_tensor(first_field): - if isinstance(first_field, dict): - iterator = first_field.items() - first_field_iterator = True - else: - try: - iterator = iter(first_field) - first_field_iterator = True - except TypeError: - first_field_iterator = False - - # if we provided an iterator as first field and the iterator is a (key, value) iterator - # set the associated fields - if first_field_iterator: - for element in iterator: - if ( - not isinstance(element, (list, tuple)) - or not len(element) == 2 - or not isinstance(element[0], str) - ): - break - setattr(self, element[0], element[1]) - if element[1] is not None: - self[element[0]] = element[1] - elif first_field is not None: - self[class_fields[0].name] = first_field - else: - for field in class_fields: - v = getattr(self, field.name) - if v is not None: - self[field.name] = v + for field in class_fields: + v = getattr(self, field.name) + if v is not None: + self[field.name] = v def __delitem__(self, *args, **kwargs): raise Exception(f"You cannot use ``__delitem__`` on a {self.__class__.__name__} instance.") From 56f7472b13e73ff33cb54d533f741a5414e8bde4 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Thu, 8 Sep 2022 14:43:44 +0200 Subject: [PATCH 2/2] remove bogus file --- _ | 53 ----------------------------------------------------- 1 file changed, 53 deletions(-) delete mode 100644 _ diff --git a/_ b/_ deleted file mode 100644 index cfe4b4c0d1c7..000000000000 --- a/_ +++ /dev/null @@ -1,53 +0,0 @@ - - -# BaseOutputs - -All models have outputs that are instances of subclasses of [`~utils.BaseOutput`]. Those are -data structures containing all the information returned by the model, but that can also be used as tuples or -dictionaries. - -Let's see how this looks in an example: - -```python -from diffusers import DDIMPipeline - -pipeline = DDIMPipeline.from_pretrained("google/ddpm-cifar10-32") -outputs = pipeline() -``` - -The `outputs` object is a [`~pipeline_utils.ImagePipelineOutput`], as we can see in the -documentation of that class below, it means it has an image attribute. - -You can access each attribute as you would usually do, and if that attribute has not been returned by the model, you will get `None`. - -When considering our `outputs` object as tuple, it only considers the attributes that don't have `None` values. -Here for instance, we could retrieve images via indexing: - -```python -outputs[:1] -``` - -which will return the tuple `(outputs.images)` for instance. - -## BaseOutput - -[[autodoc]] utils.BaseOutput - - to_tuple - - -tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") -model = BertForSequenceClassification.from_pretrained("bert-base-uncased") - -inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") -labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 -outputs = model(**inputs, labels=labels)