Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions ldm/models/autoencoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,3 +441,40 @@ def quantize(self, x, *args, **kwargs):

def forward(self, x, *args, **kwargs):
return x


if __name__ == "__main__":
import numpy as np
from omegaconf import OmegaConf
from scripts.img2img import load_img, load_model_from_config
from ldm.models.diffusion.ddim import DDIMSampler
from PIL import Image
from einops import rearrange
from ldm.modules.metrics import *
in_img = r"assets/25.jpg"
out_img = r"assets/25-decoded.jpg"
config = OmegaConf.load(r"models\first_stage_models\kl-f4\config.yaml")
model = load_model_from_config(config=config,
ckpt=r"models\first_stage_models\kl-f4\model.ckpt") #.to("cpu")
# sampler = DDIMSampler(model)
init_image = load_img(in_img).to("cuda")
# init_latent = model.get_first_stage_encoding(model.encode_first_stage(init_image))

# sampler.make_schedule(ddim_num_steps=50, ddim_eta=0.0, verbose=False)

# (rick.jpeg).size == (900, 900), 21GB memory
t_enc = int(0.75 * 50)
with torch.no_grad():
with torch.autocast("cuda"):
encoded = model.encode(init_image)
if isinstance(encoded, DiagonalGaussianDistribution):
encoded = encoded.mode()
decoded = model.decode(encoded)
decoded = torch.clamp((decoded + 1.0) / 2.0, min=0.0, max=1.0).squeeze(0)
x_sample = 255. * rearrange(decoded.cpu().detach().numpy(), 'c h w -> h w c')
Image.fromarray(x_sample.astype(np.uint8)).save(out_img)

with torch.no_grad():
torch.cuda.empty_cache()
print("PSNR:", calc_psnr(in_img, out_img))
print("SSIM:", calc_ssim(in_img, out_img))
68 changes: 68 additions & 0 deletions ldm/modules/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# code references: https://zhuanlan.zhihu.com/p/309892873

from skimage.metrics import structural_similarity as ssim
from skimage.metrics import peak_signal_noise_ratio as psnr
from PIL import Image
import numpy as np

def calc_ssim(img1_path, img2_path):
'''
Parameters
----------
img1_path : str
图像1的路径.
img2_path : str
图像2的路径.

Returns
-------
ssim_score : numpy.float64
结构相似性指数(structural similarity index,SSIM).

References
-------
https://scikit-image.org/docs/dev/auto_examples/transform/plot_ssim.html

'''
img1 = Image.open(img1_path).convert('L')
img2 = Image.open(img2_path).convert('L')
img2 = img2.resize(img1.size)
img1, img2 = np.array(img1), np.array(img2)
# 此处因为转换为灰度值之后的图像范围是0-255,所以data_range为255,如果转化为浮点数,且是0-1的范围,则data_range应为1
ssim_score = ssim(img1, img2, data_range=255)
return ssim_score


def calc_psnr(img1_path, img2_path):
'''
Parameters
----------
img1_path : str
图像1的路径.
img2_path : str
图像2的路径.

Returns
-------
psnr_score : numpy.float64
峰值信噪比(Peak Signal to Noise Ratio, PSNR).

References
-------
https://en.wikipedia.org/wiki/Peak_signal-to-noise_ratio

'''
img1 = Image.open(img1_path)
img2 = Image.open(img2_path)
img2 = img2.resize(img1.size)
img1, img2 = np.array(img1), np.array(img2)
# 此处的第一张图片为真实图像,第二张图片为测试图片
# 此处因为图像范围是0-255,所以data_range为255,如果转化为浮点数,且是0-1的范围,则data_range应为1
psnr_score = psnr(img1, img2, data_range=255)
return psnr_score

if __name__ == "__main__":
img1 = "assets/rick.jpeg"
img2 = "assets/rick-decoded.jpeg"
print("PSNR:", calc_psnr(img1, img2))
print("SSIM:", calc_ssim(img1, img2))