From 3fe37950431e43f4fd838efc6c08eab0e0d7d39d Mon Sep 17 00:00:00 2001 From: Walter Hugo Lopez Pinaya Date: Thu, 5 Jan 2023 20:31:26 +0000 Subject: [PATCH] Add v_prediction and update docstrings Signed-off-by: Walter Hugo Lopez Pinaya --- generative/networks/schedulers/ddim.py | 5 +++-- generative/networks/schedulers/ddpm.py | 4 ++++ generative/networks/schedulers/pndm.py | 12 ++++++++++++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/generative/networks/schedulers/ddim.py b/generative/networks/schedulers/ddim.py index 364cc592..e55b3941 100644 --- a/generative/networks/schedulers/ddim.py +++ b/generative/networks/schedulers/ddim.py @@ -55,8 +55,9 @@ class DDIMScheduler(nn.Module): steps_offset: an offset added to the inference steps. You can use a combination of `steps_offset=1` and `set_alpha_to_one=False`, to make the last step use step 0 for the previous alpha product, as done in stable diffusion. - prediction_type: prediction type of the scheduler function, one of `epsilon` (predicting the noise of the - diffusion process), `sample` (directly predicting the noisy sample`) or `v_prediction` (see section 2.4 + prediction_type: {``"epsilon"``, ``"sample"``, ``"v_prediction"``} + prediction type of the scheduler function, one of `epsilon` (predicting the noise of the diffusion + process), `sample` (directly predicting the noisy sample`) or `v_prediction` (see section 2.4 https://imagen.research.google/video/paper.pdf) """ diff --git a/generative/networks/schedulers/ddpm.py b/generative/networks/schedulers/ddpm.py index 9f5ca107..2b3d5940 100644 --- a/generative/networks/schedulers/ddpm.py +++ b/generative/networks/schedulers/ddpm.py @@ -51,6 +51,10 @@ class DDPMScheduler(nn.Module): variance_type: {``"fixed_small"``, ``"fixed_large"``, ``"learned"``, ``"learned_range"``} options to clip the variance used when adding noise to the denoised sample. clip_sample: option to clip predicted sample between -1 and 1 for numerical stability. + prediction_type: {``"epsilon"``, ``"sample"``, ``"v_prediction"``} + prediction type of the scheduler function, one of `epsilon` (predicting the noise of the diffusion + process), `sample` (directly predicting the noisy sample`) or `v_prediction` (see section 2.4 + https://imagen.research.google/video/paper.pdf) """ def __init__( diff --git a/generative/networks/schedulers/pndm.py b/generative/networks/schedulers/pndm.py index 69218f6b..852a4632 100644 --- a/generative/networks/schedulers/pndm.py +++ b/generative/networks/schedulers/pndm.py @@ -55,6 +55,10 @@ class PNDMScheduler(nn.Module): each diffusion step uses the value of alphas product at that step and at the previous one. For the final step there is no previous alpha. When this option is `True` the previous alpha product is fixed to `1`, otherwise it uses the value of alpha at step 0. + prediction_type: {``"epsilon"``, ``"v_prediction"``} + prediction type of the scheduler function, one of `epsilon` (predicting the noise of the diffusion + process) or `v_prediction` (see section 2.4 + https://imagen.research.google/video/paper.pdf) steps_offset: an offset added to the inference steps. You can use a combination of `offset=1` and `set_alpha_to_one=False`, to make the last step use step 0 for the previous alpha product, as done in @@ -69,6 +73,7 @@ def __init__( beta_schedule: str = "linear", skip_prk_steps: bool = False, set_alpha_to_one: bool = False, + prediction_type: str = "epsilon", steps_offset: int = 0, ) -> None: super().__init__() @@ -83,6 +88,10 @@ def __init__( else: raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}") + if prediction_type.lower() not in ["epsilon", "v_prediction"]: + raise ValueError(f"prediction_type given as {prediction_type} must be one of `epsilon` or `v_prediction`") + + self.prediction_type = prediction_type self.num_train_timesteps = num_train_timesteps self.alphas = 1.0 - self.betas self.alphas_cumprod = torch.cumprod(self.alphas, dim=0) @@ -294,6 +303,9 @@ def _get_prev_sample(self, sample: torch.Tensor, timestep: int, prev_timestep: i beta_prod_t = 1 - alpha_prod_t beta_prod_t_prev = 1 - alpha_prod_t_prev + if self.prediction_type == "v_prediction": + model_output = (alpha_prod_t**0.5) * model_output + (beta_prod_t**0.5) * sample + # corresponds to (α_(t−δ) - α_t) divided by # denominator of x_t in formula (9) and plus 1 # Note: (α_(t−δ) - α_t) / (sqrt(α_t) * (sqrt(α_(t−δ)) + sqr(α_t))) =