宁德做网站公司,wordpress 商店插件,自己做网站用什么软件,杭州做网点卖服装在那个网站相关知识
CycleGAN
循环生成网络#xff0c;实现了在没有配对示例的情况下将图像从源域X转换到目标域Y的方法#xff0c;应用于域迁移#xff0c;也就是图像风格迁移。上章介绍了可以完成图像翻译任务的Pix2Pix#xff0c;但是Pix2Pix的数据必须是成对的。CycleGAN中只需…相关知识
CycleGAN
循环生成网络实现了在没有配对示例的情况下将图像从源域X转换到目标域Y的方法应用于域迁移也就是图像风格迁移。上章介绍了可以完成图像翻译任务的Pix2Pix但是Pix2Pix的数据必须是成对的。CycleGAN中只需要两种域的数据而不需要有严格的对应关系是无监督的图像迁移网络。
模型结构
Cycle由两个镜像对称的GAN网络组成。X和Y是两种不同的域而G是将X生成Y的生成器F是将Y生成X的生成器Dx和Dy依然是他们本身的判别器。最终模型生成相应的生成器是两种风格可以彼此迁移生成对应风格的图像。
另外一个重要的部分就是损失函数这里使用的是Cycle Consistency Loss循环一致损失。过程是首先将x输入将风格X转化为风格Y的生成器G生成伪y再将伪y输入将风格Y转化为风格X的生成器F生成伪x。最终将伪x和x一起计算出损失。
实验
数据集处理
本章使用的数据集图片来源为ImageNet包含17个数据包。本章中仅使用橘子苹果部分图像示例如下
生成器构建
这里使用ResNet结构大小128128的图片使用6个残差块连接大小256256的图片使用9个残差块连接. 残差块的块数由n_layers控制。
import mindspore.nn as nn
import mindspore.ops as ops
from mindspore.common.initializer import Normalweight_init Normal(sigma0.02)# 构建卷积、归一化、激活函数结构
class ConvNormReLU(nn.Cell):def __init__(self, input_channel, out_planes, kernel_size4, stride2, alpha0.2, norm_modeinstance,pad_modeCONSTANT, use_reluTrue, paddingNone, transposeFalse):super(ConvNormReLU, self).__init__()norm nn.BatchNorm2d(out_planes)if norm_mode instance:norm nn.BatchNorm2d(out_planes, affineFalse)has_bias (norm_mode instance)if padding is None:padding (kernel_size - 1) // 2if pad_mode CONSTANT:if transpose:conv nn.Conv2dTranspose(input_channel, out_planes, kernel_size, stride, pad_modesame,has_biashas_bias, weight_initweight_init)else:conv nn.Conv2d(input_channel, out_planes, kernel_size, stride, pad_modepad,has_biashas_bias, paddingpadding, weight_initweight_init)layers [conv, norm]else:paddings ((0, 0), (0, 0), (padding, padding), (padding, padding))pad nn.Pad(paddingspaddings, modepad_mode)if transpose:conv nn.Conv2dTranspose(input_channel, out_planes, kernel_size, stride, pad_modepad,has_biashas_bias, weight_initweight_init)else:conv nn.Conv2d(input_channel, out_planes, kernel_size, stride, pad_modepad,has_biashas_bias, weight_initweight_init)layers [pad, conv, norm]if use_relu:relu nn.ReLU()if alpha 0:relu nn.LeakyReLU(alpha)layers.append(relu)self.features nn.SequentialCell(layers)def construct(self, x):output self.features(x)return output# 构建残差块
class ResidualBlock(nn.Cell):def __init__(self, dim, norm_modeinstance, dropoutFalse, pad_modeCONSTANT):super(ResidualBlock, self).__init__()self.conv1 ConvNormReLU(dim, dim, 3, 1, 0, norm_mode, pad_mode)self.conv2 ConvNormReLU(dim, dim, 3, 1, 0, norm_mode, pad_mode, use_reluFalse)self.dropout dropoutif dropout:self.dropout nn.Dropout(p0.5)def construct(self, x):out self.conv1(x)if self.dropout:out self.dropout(out)out self.conv2(out)return x out构建生成器
class ResNetGenerator(nn.Cell):def __init__(self, input_channel3, output_channel64, n_layers9, alpha0.2, norm_modeinstance, dropoutFalse,pad_modeCONSTANT):super(ResNetGenerator, self).__init__()self.conv_in ConvNormReLU(input_channel, output_channel, 7, 1, alpha, norm_mode, pad_modepad_mode)self.down_1 ConvNormReLU(output_channel, output_channel * 2, 3, 2, alpha, norm_mode)self.down_2 ConvNormReLU(output_channel * 2, output_channel * 4, 3, 2, alpha, norm_mode)layers [ResidualBlock(output_channel * 4, norm_mode, dropoutdropout, pad_modepad_mode)] * n_layersself.residuals nn.SequentialCell(layers)self.up_2 ConvNormReLU(output_channel * 4, output_channel * 2, 3, 2, alpha, norm_mode, transposeTrue)self.up_1 ConvNormReLU(output_channel * 2, output_channel, 3, 2, alpha, norm_mode, transposeTrue)if pad_mode CONSTANT:self.conv_out nn.Conv2d(output_channel, 3, kernel_size7, stride1, pad_modepad,padding3, weight_initweight_init)else:pad nn.Pad(paddings((0, 0), (0, 0), (3, 3), (3, 3)), modepad_mode)conv nn.Conv2d(output_channel, 3, kernel_size7, stride1, pad_modepad, weight_initweight_init)self.conv_out nn.SequentialCell([pad, conv])def construct(self, x):x self.conv_in(x)x self.down_1(x)x self.down_2(x)x self.residuals(x)x self.up_2(x)x self.up_1(x)output self.conv_out(x)return ops.tanh(output)# 实例化生成器
net_rg_a ResNetGenerator()
net_rg_a.update_parameters_name(net_rg_a.)net_rg_b ResNetGenerator()
net_rg_b.update_parameters_name(net_rg_b.)构建判别器
这里使用PatchGAN和Pix2Pix实现方式一样最终使用Sigmoid激活函数得到最终概率。
# 定义判别器
class Discriminator(nn.Cell):def __init__(self, input_channel3, output_channel64, n_layers3, alpha0.2, norm_modeinstance):super(Discriminator, self).__init__()kernel_size 4layers [nn.Conv2d(input_channel, output_channel, kernel_size, 2, pad_modepad, padding1, weight_initweight_init),nn.LeakyReLU(alpha)]nf_mult output_channelfor i in range(1, n_layers):nf_mult_prev nf_multnf_mult min(2 ** i, 8) * output_channellayers.append(ConvNormReLU(nf_mult_prev, nf_mult, kernel_size, 2, alpha, norm_mode, padding1))nf_mult_prev nf_multnf_mult min(2 ** n_layers, 8) * output_channellayers.append(ConvNormReLU(nf_mult_prev, nf_mult, kernel_size, 1, alpha, norm_mode, padding1))layers.append(nn.Conv2d(nf_mult, 1, kernel_size, 1, pad_modepad, padding1, weight_initweight_init))self.features nn.SequentialCell(layers)def construct(self, x):output self.features(x)return output# 判别器初始化
net_d_a Discriminator()
net_d_a.update_parameters_name(net_d_a.)net_d_b Discriminator()
net_d_b.update_parameters_name(net_d_b.)优化器和损失函数
对于G和其判别器Dy目标损失函数定义为 L G A N ( G , D Y , X , Y ) E y − p d a t a ( y ) [ l o g D Y ( y ) ] E x − p d a t a ( x ) [ l o g ( 1 − D Y ( G ( x ) ) ) ] L_{GAN}(G,D_Y,X,Y)E_{y-p_{data}(y)}[logD_Y(y)]E_{x-p_{data}(x)}[log(1-D_Y(G(x)))] LGAN(G,DY,X,Y)Ey−pdata(y)[logDY(y)]Ex−pdata(x)[log(1−DY(G(x)))] 其中 G G G 试图生成看起来与 Y Y Y 中的图像相似的图像 G ( x ) G(x) G(x) 而 D Y D_{Y} DY 的目标是区分翻译样本 G ( x ) G(x) G(x) 和真实样本 y y y 生成器的目标是最小化这个损失函数以此来对抗判别器。即 $ min_{G} max_{D_{Y}}L_{GAN}(G,D_{Y} ,X,Y )$ 。
而对于两个生成器而言使用了此前介绍的循环一致性损失函数具体定义为 L c y c ( G , F ) E x − p d a t a ( x ) [ ∥ F ( G ( x ) ) − x ∥ 1 ] E y − p d a t a ( y ) [ ∥ G ( F ( y ) ) − y ∥ 1 ] L_{cyc}(G,F)E_{x-p_{data}(x)}[\Vert F(G(x))-x\Vert_{1}]E_{y-p_{data}(y)}[\Vert G(F(y))-y\Vert_{1}] Lcyc(G,F)Ex−pdata(x)[∥F(G(x))−x∥1]Ey−pdata(y)[∥G(F(y))−y∥1] 循环一致损失能够保证重建图像 F ( G ( x ) ) F(G(x)) F(G(x)) 与输入图像 x x x 紧密匹配。
构建生成器判别器优化器
optimizer_rg_a nn.Adam(net_rg_a.trainable_params(), learning_rate0.0002, beta10.5)
optimizer_rg_b nn.Adam(net_rg_b.trainable_params(), learning_rate0.0002, beta10.5)optimizer_d_a nn.Adam(net_d_a.trainable_params(), learning_rate0.0002, beta10.5)
optimizer_d_b nn.Adam(net_d_b.trainable_params(), learning_rate0.0002, beta10.5)# GAN网络损失函数这里最后一层不使用sigmoid函数
loss_fn nn.MSELoss(reductionmean)
l1_loss nn.L1Loss(mean)def gan_loss(predict, target):target ops.ones_like(predict) * targetloss loss_fn(predict, target)return loss前向计算
为了减少模型震荡这里的策略是使用生成器生成图像的历史数据而不是最新生成的图像数据来更新判别器。创建了image_poll函数保留了图像缓存区存储前50个图像。
# 前向计算def generator(img_a, img_b):fake_a net_rg_b(img_b)fake_b net_rg_a(img_a)rec_a net_rg_b(fake_b)rec_b net_rg_a(fake_a)identity_a net_rg_b(img_a)identity_b net_rg_a(img_b)return fake_a, fake_b, rec_a, rec_b, identity_a, identity_blambda_a 10.0
lambda_b 10.0
lambda_idt 0.5def generator_forward(img_a, img_b):true Tensor(True, dtype.bool_)fake_a, fake_b, rec_a, rec_b, identity_a, identity_b generator(img_a, img_b)loss_g_a gan_loss(net_d_b(fake_b), true)loss_g_b gan_loss(net_d_a(fake_a), true)loss_c_a l1_loss(rec_a, img_a) * lambda_aloss_c_b l1_loss(rec_b, img_b) * lambda_bloss_idt_a l1_loss(identity_a, img_a) * lambda_a * lambda_idtloss_idt_b l1_loss(identity_b, img_b) * lambda_b * lambda_idtloss_g loss_g_a loss_g_b loss_c_a loss_c_b loss_idt_a loss_idt_breturn fake_a, fake_b, loss_g, loss_g_a, loss_g_b, loss_c_a, loss_c_b, loss_idt_a, loss_idt_bdef generator_forward_grad(img_a, img_b):_, _, loss_g, _, _, _, _, _, _ generator_forward(img_a, img_b)return loss_gdef discriminator_forward(img_a, img_b, fake_a, fake_b):false Tensor(False, dtype.bool_)true Tensor(True, dtype.bool_)d_fake_a net_d_a(fake_a)d_img_a net_d_a(img_a)d_fake_b net_d_b(fake_b)d_img_b net_d_b(img_b)loss_d_a gan_loss(d_fake_a, false) gan_loss(d_img_a, true)loss_d_b gan_loss(d_fake_b, false) gan_loss(d_img_b, true)loss_d (loss_d_a loss_d_b) * 0.5return loss_ddef discriminator_forward_a(img_a, fake_a):false Tensor(False, dtype.bool_)true Tensor(True, dtype.bool_)d_fake_a net_d_a(fake_a)d_img_a net_d_a(img_a)loss_d_a gan_loss(d_fake_a, false) gan_loss(d_img_a, true)return loss_d_adef discriminator_forward_b(img_b, fake_b):false Tensor(False, dtype.bool_)true Tensor(True, dtype.bool_)d_fake_b net_d_b(fake_b)d_img_b net_d_b(img_b)loss_d_b gan_loss(d_fake_b, false) gan_loss(d_img_b, true)return loss_d_b# 保留了一个图像缓冲区用来存储之前创建的50个图像
pool_size 50
def image_pool(images):num_imgs 0image1 []if isinstance(images, Tensor):images images.asnumpy()return_images []for image in images:if num_imgs pool_size:num_imgs num_imgs 1image1.append(image)return_images.append(image)else:if random.uniform(0, 1) 0.5:random_id random.randint(0, pool_size - 1)tmp image1[random_id].copy()image1[random_id] imagereturn_images.append(tmp)else:return_images.append(image)output Tensor(return_images, ms.float32)if output.ndim ! 4:raise ValueError(img should be 4d, but get shape {}.format(output.shape))return output计算梯度及反向传播
from mindspore import value_and_grad# 实例化求梯度的方法
grad_g_a value_and_grad(generator_forward_grad, None, net_rg_a.trainable_params())
grad_g_b value_and_grad(generator_forward_grad, None, net_rg_b.trainable_params())grad_d_a value_and_grad(discriminator_forward_a, None, net_d_a.trainable_params())
grad_d_b value_and_grad(discriminator_forward_b, None, net_d_b.trainable_params())# 计算生成器的梯度反向传播更新参数
def train_step_g(img_a, img_b):net_d_a.set_grad(False)net_d_b.set_grad(False)fake_a, fake_b, lg, lga, lgb, lca, lcb, lia, lib generator_forward(img_a, img_b)_, grads_g_a grad_g_a(img_a, img_b)_, grads_g_b grad_g_b(img_a, img_b)optimizer_rg_a(grads_g_a)optimizer_rg_b(grads_g_b)return fake_a, fake_b, lg, lga, lgb, lca, lcb, lia, lib# 计算判别器的梯度反向传播更新参数
def train_step_d(img_a, img_b, fake_a, fake_b):net_d_a.set_grad(True)net_d_b.set_grad(True)loss_d_a, grads_d_a grad_d_a(img_a, fake_a)loss_d_b, grads_d_b grad_d_b(img_b, fake_b)loss_d (loss_d_a loss_d_b) * 0.5optimizer_d_a(grads_d_a)optimizer_d_b(grads_d_b)return loss_d模型训练
依然分成判别器的训练和生成器的训练。
训练判别器训练判别器的目的是最大程度地提高判别图像真伪的概率。按照论文的方法需要训练判别器来最小化 E y − p d a t a ( y ) [ ( D ( y ) − 1 ) 2 ] E_{y-p_{data}(y)}[(D(y)-1)^2] Ey−pdata(y)[(D(y)−1)2] 训练生成器如 CycleGAN 论文所述我们希望通过最小化 E x − p d a t a ( x ) [ ( D ( G ( x ) − 1 ) 2 ] E_{x-p_{data}(x)}[(D(G(x)-1)^2] Ex−pdata(x)[(D(G(x)−1)2] 来训练生成器以产生更好的虚假图像。
import os
import time
import random
import numpy as np
from PIL import Image
from mindspore import Tensor, save_checkpoint
from mindspore import dtype# 由于时间原因epochs设置为1可根据需求进行调整
epochs 1
save_step_num 80
save_checkpoint_epochs 1
save_ckpt_dir ./train_ckpt_outputs/print(Start training!)for epoch in range(epochs):g_loss []d_loss []start_time_e time.time()for step, data in enumerate(dataset.create_dict_iterator()):start_time_s time.time()img_a data[image_A]img_b data[image_B]res_g train_step_g(img_a, img_b)fake_a res_g[0]fake_b res_g[1]res_d train_step_d(img_a, img_b, image_pool(fake_a), image_pool(fake_b))loss_d float(res_d.asnumpy())step_time time.time() - start_time_sres []for item in res_g[2:]:res.append(float(item.asnumpy()))g_loss.append(res[0])d_loss.append(loss_d)if step % save_step_num 0:print(fEpoch:[{int(epoch 1):3d}/{int(epochs):3d}], fstep:[{int(step):4d}/{int(datasize):4d}], ftime:{step_time:3f}s,\nfloss_g:{res[0]:.2f}, loss_d:{loss_d:.2f}, floss_g_a: {res[1]:.2f}, loss_g_b: {res[2]:.2f}, floss_c_a: {res[3]:.2f}, loss_c_b: {res[4]:.2f}, floss_idt_a: {res[5]:.2f}, loss_idt_b: {res[6]:.2f})epoch_cost time.time() - start_time_eper_step_time epoch_cost / datasizemean_loss_d, mean_loss_g sum(d_loss) / datasize, sum(g_loss) / datasizeprint(fEpoch:[{int(epoch 1):3d}/{int(epochs):3d}], fepoch time:{epoch_cost:.2f}s, per step time:{per_step_time:.2f}, fmean_g_loss:{mean_loss_g:.2f}, mean_d_loss:{mean_loss_d :.2f})if epoch % save_checkpoint_epochs 0:os.makedirs(save_ckpt_dir, exist_okTrue)save_checkpoint(net_rg_a, os.path.join(save_ckpt_dir, fg_a_{epoch}.ckpt))save_checkpoint(net_rg_b, os.path.join(save_ckpt_dir, fg_b_{epoch}.ckpt))save_checkpoint(net_d_a, os.path.join(save_ckpt_dir, fd_a_{epoch}.ckpt))save_checkpoint(net_d_b, os.path.join(save_ckpt_dir, fd_b_{epoch}.ckpt))模型推理
加载模型参数完成对原图的风格迁移。
import os
from PIL import Image
import mindspore.dataset as ds
import mindspore.dataset.vision as vision
from mindspore import load_checkpoint, load_param_into_net# 加载权重文件
def load_ckpt(net, ckpt_dir):param_GA load_checkpoint(ckpt_dir)load_param_into_net(net, param_GA)g_a_ckpt ./CycleGAN_apple2orange/ckpt/g_a.ckpt
g_b_ckpt ./CycleGAN_apple2orange/ckpt/g_b.ckptload_ckpt(net_rg_a, g_a_ckpt)
load_ckpt(net_rg_b, g_b_ckpt)# 图片推理
fig plt.figure(figsize(11, 2.5), dpi100)
def eval_data(dir_path, net, a):def read_img():for dir in os.listdir(dir_path):path os.path.join(dir_path, dir)img Image.open(path).convert(RGB)yield img, dirdataset ds.GeneratorDataset(read_img, column_names[image, image_name])trans [vision.Resize((256, 256)), vision.Normalize(mean[0.5 * 255] * 3, std[0.5 * 255] * 3), vision.HWC2CHW()]dataset dataset.map(operationstrans, input_columns[image])dataset dataset.batch(1)for i, data in enumerate(dataset.create_dict_iterator()):img data[image]fake net(img)fake (fake[0] * 0.5 * 255 0.5 * 255).astype(np.uint8).transpose((1, 2, 0))img (img[0] * 0.5 * 255 0.5 * 255).astype(np.uint8).transpose((1, 2, 0))fig.add_subplot(2, 8, i1a)plt.axis(off)plt.imshow(img.asnumpy())fig.add_subplot(2, 8, i9a)plt.axis(off)plt.imshow(fake.asnumpy())eval_data(./CycleGAN_apple2orange/predict/apple, net_rg_a, 0)
eval_data(./CycleGAN_apple2orange/predict/orange, net_rg_b, 4)
plt.show()可以看到已经完成了橘子和苹果的风格迁移。
总结
本章依然实现了一个基于GAN的模型使用CycleGAN完成了风格迁移任务。CycleGAN使用镜像的结构利用循环一致性损失函数来计算原图片和生成的伪图之间的损失。
打卡凭证