From 8de864ddd28be21298ec97a04c03b6ab9649ef5a Mon Sep 17 00:00:00 2001 From: Satomi2333 <55435133+Satomi2333@users.noreply.github.com> Date: Sat, 12 Nov 2022 14:08:55 +0800 Subject: [PATCH] test the psnr and ssim of the image generated by autoencoder --- ldm/models/autoencoder.py | 37 +++++++++++++++++++++ ldm/modules/metrics.py | 68 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+) create mode 100644 ldm/modules/metrics.py diff --git a/ldm/models/autoencoder.py b/ldm/models/autoencoder.py index 6a9c4f454..d965af422 100644 --- a/ldm/models/autoencoder.py +++ b/ldm/models/autoencoder.py @@ -441,3 +441,40 @@ def quantize(self, x, *args, **kwargs): def forward(self, x, *args, **kwargs): return x + + +if __name__ == "__main__": + import numpy as np + from omegaconf import OmegaConf + from scripts.img2img import load_img, load_model_from_config + from ldm.models.diffusion.ddim import DDIMSampler + from PIL import Image + from einops import rearrange + from ldm.modules.metrics import * + in_img = r"assets/25.jpg" + out_img = r"assets/25-decoded.jpg" + config = OmegaConf.load(r"models\first_stage_models\kl-f4\config.yaml") + model = load_model_from_config(config=config, + ckpt=r"models\first_stage_models\kl-f4\model.ckpt") #.to("cpu") + # sampler = DDIMSampler(model) + init_image = load_img(in_img).to("cuda") + # init_latent = model.get_first_stage_encoding(model.encode_first_stage(init_image)) + + # sampler.make_schedule(ddim_num_steps=50, ddim_eta=0.0, verbose=False) + + # (rick.jpeg).size == (900, 900), 21GB memory + t_enc = int(0.75 * 50) + with torch.no_grad(): + with torch.autocast("cuda"): + encoded = model.encode(init_image) + if isinstance(encoded, DiagonalGaussianDistribution): + encoded = encoded.mode() + decoded = model.decode(encoded) + decoded = torch.clamp((decoded + 1.0) / 2.0, min=0.0, max=1.0).squeeze(0) + x_sample = 255. * rearrange(decoded.cpu().detach().numpy(), 'c h w -> h w c') + Image.fromarray(x_sample.astype(np.uint8)).save(out_img) + + with torch.no_grad(): + torch.cuda.empty_cache() + print("PSNR:", calc_psnr(in_img, out_img)) + print("SSIM:", calc_ssim(in_img, out_img)) \ No newline at end of file diff --git a/ldm/modules/metrics.py b/ldm/modules/metrics.py new file mode 100644 index 000000000..5ed9bcdd5 --- /dev/null +++ b/ldm/modules/metrics.py @@ -0,0 +1,68 @@ +# code references: https://zhuanlan.zhihu.com/p/309892873 + +from skimage.metrics import structural_similarity as ssim +from skimage.metrics import peak_signal_noise_ratio as psnr +from PIL import Image +import numpy as np + +def calc_ssim(img1_path, img2_path): + ''' + Parameters + ---------- + img1_path : str + 图像1的路径. + img2_path : str + 图像2的路径. + + Returns + ------- + ssim_score : numpy.float64 + 结构相似性指数(structural similarity index,SSIM). + + References + ------- + https://scikit-image.org/docs/dev/auto_examples/transform/plot_ssim.html + + ''' + img1 = Image.open(img1_path).convert('L') + img2 = Image.open(img2_path).convert('L') + img2 = img2.resize(img1.size) + img1, img2 = np.array(img1), np.array(img2) + # 此处因为转换为灰度值之后的图像范围是0-255,所以data_range为255,如果转化为浮点数,且是0-1的范围,则data_range应为1 + ssim_score = ssim(img1, img2, data_range=255) + return ssim_score + + +def calc_psnr(img1_path, img2_path): + ''' + Parameters + ---------- + img1_path : str + 图像1的路径. + img2_path : str + 图像2的路径. + + Returns + ------- + psnr_score : numpy.float64 + 峰值信噪比(Peak Signal to Noise Ratio, PSNR). + + References + ------- + https://en.wikipedia.org/wiki/Peak_signal-to-noise_ratio + + ''' + img1 = Image.open(img1_path) + img2 = Image.open(img2_path) + img2 = img2.resize(img1.size) + img1, img2 = np.array(img1), np.array(img2) + # 此处的第一张图片为真实图像,第二张图片为测试图片 + # 此处因为图像范围是0-255,所以data_range为255,如果转化为浮点数,且是0-1的范围,则data_range应为1 + psnr_score = psnr(img1, img2, data_range=255) + return psnr_score + +if __name__ == "__main__": + img1 = "assets/rick.jpeg" + img2 = "assets/rick-decoded.jpeg" + print("PSNR:", calc_psnr(img1, img2)) + print("SSIM:", calc_ssim(img1, img2))