CompVis · Satomi2333 · Nov 12, 2022
diff --git a/ldm/models/autoencoder.py b/ldm/models/autoencoder.py
@@ -441,3 +441,40 @@ def quantize(self, x, *args, **kwargs):
 
     def forward(self, x, *args, **kwargs):
         return x
+
+
+if __name__ == "__main__":
+    import numpy as np
+    from omegaconf import OmegaConf
+    from scripts.img2img import load_img, load_model_from_config
+    from ldm.models.diffusion.ddim import DDIMSampler
+    from PIL import Image
+    from einops import rearrange
+    from ldm.modules.metrics import *
+    in_img = r"assets/25.jpg"
+    out_img = r"assets/25-decoded.jpg"
+    config = OmegaConf.load(r"models\first_stage_models\kl-f4\config.yaml")
+    model = load_model_from_config(config=config,
+                                  ckpt=r"models\first_stage_models\kl-f4\model.ckpt") #.to("cpu")
+    # sampler = DDIMSampler(model)
+    init_image = load_img(in_img).to("cuda")
+    # init_latent = model.get_first_stage_encoding(model.encode_first_stage(init_image))
+
+    # sampler.make_schedule(ddim_num_steps=50, ddim_eta=0.0, verbose=False)
+
+    # (rick.jpeg).size == (900, 900), 21GB memory
+    t_enc = int(0.75 * 50)
+    with torch.no_grad():
+        with torch.autocast("cuda"):
+            encoded = model.encode(init_image)
+            if isinstance(encoded, DiagonalGaussianDistribution):
+                encoded = encoded.mode()
+            decoded = model.decode(encoded)
+            decoded = torch.clamp((decoded + 1.0) / 2.0, min=0.0, max=1.0).squeeze(0)
+            x_sample = 255. * rearrange(decoded.cpu().detach().numpy(), 'c h w -> h w c')
+            Image.fromarray(x_sample.astype(np.uint8)).save(out_img)
+
+    with torch.no_grad():
+        torch.cuda.empty_cache()
+    print("PSNR:", calc_psnr(in_img, out_img))
+    print("SSIM:", calc_ssim(in_img, out_img))
diff --git a/ldm/modules/metrics.py b/ldm/modules/metrics.py
@@ -0,0 +1,68 @@
+# code references: https://zhuanlan.zhihu.com/p/309892873
+
+from skimage.metrics import structural_similarity as ssim
+from skimage.metrics import peak_signal_noise_ratio as psnr
+from PIL import Image
+import numpy as np
+
+def calc_ssim(img1_path, img2_path):
+    '''
+    Parameters
+    ----------
+    img1_path : str
+        图像1的路径.
+    img2_path : str
+        图像2的路径.
+
+    Returns
+    -------
+    ssim_score : numpy.float64
+        结构相似性指数（structural similarity index，SSIM）.
+
+    References
+    -------
+    https://scikit-image.org/docs/dev/auto_examples/transform/plot_ssim.html
+
+    '''
+    img1 = Image.open(img1_path).convert('L')
+    img2 = Image.open(img2_path).convert('L')
+    img2 = img2.resize(img1.size)
+    img1, img2 = np.array(img1), np.array(img2)
+    # 此处因为转换为灰度值之后的图像范围是0-255，所以data_range为255，如果转化为浮点数，且是0-1的范围，则data_range应为1
+    ssim_score = ssim(img1, img2, data_range=255)
+    return ssim_score
+
+
+def calc_psnr(img1_path, img2_path):
+    '''
+    Parameters
+    ----------
+    img1_path : str
+        图像1的路径.
+    img2_path : str
+        图像2的路径.
+
+    Returns
+    -------
+    psnr_score : numpy.float64
+        峰值信噪比(Peak Signal to Noise Ratio, PSNR).
+
+    References
+    -------
+    https://en.wikipedia.org/wiki/Peak_signal-to-noise_ratio
+
+    '''
+    img1 = Image.open(img1_path)
+    img2 = Image.open(img2_path)
+    img2 = img2.resize(img1.size)
+    img1, img2 = np.array(img1), np.array(img2)
+    # 此处的第一张图片为真实图像，第二张图片为测试图片
+    # 此处因为图像范围是0-255，所以data_range为255，如果转化为浮点数，且是0-1的范围，则data_range应为1
+    psnr_score = psnr(img1, img2, data_range=255)
+    return psnr_score
+
+if __name__ == "__main__":
+    img1 = "assets/rick.jpeg"
+    img2 = "assets/rick-decoded.jpeg"
+    print("PSNR:", calc_psnr(img1, img2))
+    print("SSIM:", calc_ssim(img1, img2))