生成模型训练
在该模型中,与文末的一些失败的结构不同的是,所以了动量的特性。也就是不仅仅参考了当前训练的参数,也参考了上一次训连的参数,来综合的考虑。其中BN层中的momentum参数相当于之前提到的增加的动量的机制。
# 0.8表示新一轮产生的数据与前一轮产生的数据之间的使用比例,也就是动量操作 nn.BatchNorm2d(64, 0.8),
在前面的文章中有所提及。详情见【5】过拟合处理的一些技巧中的第4部分。
import torch from torch import nn from torchvision import datasets from torchvision import transforms from torch.utils.data import DataLoader from torchvision.utils import save_image from torch import optim import os # 设置超参数 batch_size = 64 learning_rate = 0.0002 epochsize = 60 sample_dir = "images_3" # 创建生成图像的目录 if not os.path.exists(sample_dir): os.makedirs(sample_dir) # 生成器结构 class Generator(nn.Module): def __init__(self): super(Generator, self).__init__() # 全连接层 self.fc_layer = nn.Sequential(nn.Linear(100, 128 * 7 * 7)) # torch.Size([64, 6272]) # 卷积层 self.conv_layer = nn.Sequential( nn.BatchNorm2d(128), nn.Upsample(scale_factor=2), # torch.Size([64, 128, 14, 14]) nn.Conv2d(128, 128, 3, stride=1, padding=1), nn.BatchNorm2d(128, 0.8), nn.LeakyReLU(0.2, inplace=True), nn.Upsample(scale_factor=2), # torch.Size([64, 128, 28, 28]) nn.Conv2d(128, 64, 3, stride=1, padding=1), nn.BatchNorm2d(64, 0.8), nn.LeakyReLU(0.2, inplace=True), # torch.Size([64, 64, 28, 28]) nn.Conv2d(64, 1, 3, stride=1, padding=1), nn.Tanh() # torch.Size([64, 1, 28, 28]) ) def forward(self, input): x = self.fc_layer(input) # # torch.Size([64, 6272]) x = x.view(input.shape[0], 128, 7, 7) # torch.Size([64, 128, 7, 7]) x = self.conv_layer(x) return x # 鉴别器结构 class Discriminator(nn.Module): def __init__(self): super(Discriminator, self).__init__() # 卷积层 self.conv_layer = nn.Sequential( # 第一层卷积层没有使用bn层,其余三层均有使用 nn.Conv2d(1, 16, 3, 2, 1), # torch.Size([64, 16, 14, 14]) nn.LeakyReLU(0.2, inplace=True), nn.Dropout2d(0.25), nn.Conv2d(16, 32, 3, 2, 1), # torch.Size([64, 32, 7, 7]) nn.LeakyReLU(0.2, inplace=True), nn.Dropout2d(0.25), nn.BatchNorm2d(32, momentum=0.8), nn.Conv2d(32, 64, 3, 2, 1), # torch.Size([64, 64, 4, 4]) nn.LeakyReLU(0.2, inplace=True), nn.Dropout2d(0.25), nn.BatchNorm2d(64, momentum=0.8), nn.Conv2d(64, 128, 3, 2, 1), # torch.Size([64, 128, 2, 2]) nn.LeakyReLU(0.2, inplace=True), nn.Dropout2d(0.25), nn.BatchNorm2d(128, momentum=0.8) ) # 全连接层 self.fc_layer = nn.Sequential( nn.Linear(512, 1), nn.Sigmoid() ) def forward(self, input): x = self.conv_layer(input) # torch.Size([64, 128, 2, 2]) x = x.view(input.shape[0], -1) # torch.Size([64, 512]) x = self.fc_layer(x) # torch.Size([64, 1]) return x # 训练集下载 mnist_traindata = datasets.MNIST('E:/学习/机器学习/数据集/MNIST', train=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.5], std=[0.5]) ]), download=False) mnist_train = DataLoader(mnist_traindata, batch_size=batch_size, shuffle=True) # GPU加速 device = torch.device('cuda') G = Generator().to(device) D = Discriminator().to(device) # 导入之前的训练模型 # G.load_state_dict(torch.load('G_plus.ckpt')) # D.load_state_dict(torch.load('D_plus.ckpt')) # 设置优化器与损失函数,二分类的时候使用BCELoss较好,BCEWithLogitsLoss是自带一层Sigmoid # criteon = nn.BCEWithLogitsLoss() criteon = nn.BCELoss() G_optimizer = optim.Adam(G.parameters(), lr=learning_rate) D_optimizer = optim.Adam(D.parameters(), lr=learning_rate) # 设置对比标签 # realimage_label = torch.ones(batch_size, 1).to(device) # value:1 torch.Size([128, 1]) # fakeimage_label = torch.zeros(batch_size, 1).to(device) # value:0 torch.Size([128, 1]) # 开始训练 print("start training") for epoch in range(epochsize): D_loss_total = 0 G_loss_total = 0 total_num = 0 # 这里的RealImageLabel是没有用上的 for batchidx, (realimage, _) in enumerate(mnist_train): realimage = realimage.to(device) realimage_label = torch.ones(realimage.size(0), 1).to(device) # value:1 torch.Size([128, 1]) fakeimage_label = torch.zeros(realimage.size(0), 1).to(device) # value:0 torch.Size([128, 1]) # 随机生成噪音 z = torch.randn(realimage.size(0), 100).to(device) #================================================== # 训练鉴别器————总的损失为两者相加 d_realimage_loss = criteon(D(realimage), realimage_label) d_fakeimage_loss = criteon(D(G(z)), fakeimage_label) D_loss = d_realimage_loss + d_fakeimage_loss # 参数训练三个步骤 D_optimizer.zero_grad() D_loss.backward() D_optimizer.step() # 计算一次epoch的总损失 D_loss_total += D_loss #================================================== # 训练生成器————损失只有一个 G_loss = criteon(D(G(z)), realimage_label) # 参数训练三个步骤 G_optimizer.zero_grad() G_loss.backward() G_optimizer.step() # 计算一次epoch的总损失 G_loss_total += G_loss # 计算训练图像个数 total_num += realimage.size(0) # 打印相关的loss值 if batchidx % 300 == 0: print("batchidx:{}/{}, D_loss:{}, G_loss:{}, total_num:{},".format(batchidx, len(mnist_train), D_loss, G_loss, total_num, )) # 打印一次训练的loss值 print('Epoch:{}/{}, D_loss:{}, G_loss:{}, total_num:{}'.format(epoch, epochsize, D_loss_total / len(mnist_train), G_loss_total / len(mnist_train), total_num)) # 保存生成图像 z = torch.randn(batch_size, 100).to(device) save_image(G(z).data[:64], os.path.join(sample_dir, 'fake_images-{}.png'.format(epoch + 14)), nrow=8, normalize=True) # 保存网络结构 torch.save(G.state_dict(), 'G_plus.ckpt') torch.save(D.state_dict(), 'D_plus.ckpt')
生成模型结果展示
DCGAN的训练效果比原始的GAN要好得多,一开始的图像就比较清晰
epoch1生成的图像
epoch5生成的图像
epoch10生成的图像
epoch20生成的图像
epoch50生成的图像
网络结构的探索
在实验过程中,我不知用了一个结构,参考了别人构建出来的一些结构,但是发现GAN并没有很容易的就训练起来,接连使用了多个结构生产出来的图像均是噪声,以下均是是失败的例子(第三个还未尝试)
1)结构1
# 生产器结构 class Generator(nn.Module): def __init__(self): super(Generator, self).__init__() # input:(64,100) self.fc = nn.Sequential( nn.Linear(100, 1024), nn.BatchNorm1d(1024), nn.LeakyReLU(), nn.Linear(1024, 128 * 7 * 7), nn.BatchNorm1d(128 * 7 * 7), nn.LeakyReLU(), ) # output:torch.Size([64, 128*7*7]) # input:torch.Size([64, 128*7*7]) # x = x.view(-1, 128, 7, 7) # output:torch.Size([64, 128, 7, 7]) # input:torch.Size([64, 128, 7, 7]) self.deconv = nn.Sequential( nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1), # output:torch.Size([64, 64, 14, 14]) nn.BatchNorm2d(64), # output:torch.Size([64, 64, 14, 14]) nn.LeakyReLU(), nn.ConvTranspose2d(64, 1, kernel_size=4, stride=2, padding=1), # output:torch.Size([64, 1, 28, 28]) nn.Sigmoid(), ) # output:torch.Size([64, 1, 28, 28]) def forward(self, input): x = self.fc(input) # output:torch.Size([64, 128*7*7]) x = x.view(-1, 128, 7, 7) # output: torch.Size([64, 128, 7, 7]) x = self.deconv(x) # output:torch.Size([64, 1, 28, 28]) return x # 鉴别器结构 class Discriminator(nn.Module): def __init__(self): super(Discriminator, self).__init__() # input:torch.Size([64, 1, 28, 28]) self.conv = nn.Sequential( nn.Conv2d(1, 64, kernel_size=4, stride=2, padding=1), # output:torch.Size([64, 64, 14, 14]) nn.LeakyReLU(0.2), nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1), # output:torch.Size([64, 128, 7, 7]) nn.BatchNorm2d(128), nn.LeakyReLU(0.2), ) # output:torch.Size([64, 128, 7, 7]) # input:torch.Size([64, 128, 7, 7]) # x = x.view(-1, 128 * 7 * 7) # output:torch.Size([64, 128*7*7]) # input:torch.Size([64, 128*7*7]) self.fc = nn.Sequential( nn.Linear(128 * 7 * 7, 1024), # [64, 128*7*7] --> [128*7*7 , 1024] nn.BatchNorm1d(1024), nn.LeakyReLU(0.2), nn.Linear(1024, 1), # [128*7*7 , 1024] --> [1024 , 1] ) # output:1 def forward(self, input): x = self.conv(input) # output:torch.Size([64, 128, 7, 7]) x = x.view(-1, 128 * 7 * 7) # output:torch.Size([64, 128*7*7]) x = self.fc(x) # # output:1 return x
生成结果
2)结构2
# 生成器结构 class Generator(nn.Module): def __init__(self): super(Generator, self).__init__() self.model = nn.Sequential( nn.ConvTranspose2d(100, 224, 4, 2, 1, bias=False), # torch.Size([64, 224, 2, 2]) nn.BatchNorm2d(224), nn.ReLU(inplace=True), nn.ConvTranspose2d(224, 112, 4, 2, 1, bias=False), # torch.Size([64, 112, 4, 4]) nn.BatchNorm2d(112), nn.ReLU(inplace=True), nn.ConvTranspose2d(112, 56, 4, 2, 1, bias=False), # torch.Size([64, 56, 8, 8]) nn.BatchNorm2d(56), nn.ReLU(inplace=True), nn.ConvTranspose2d(56, 28, 4, 2, 2, bias=False), # torch.Size([64, 28, 14, 14]) nn.BatchNorm2d(28), nn.ReLU(inplace=True), nn.ConvTranspose2d(28, 1, 4, 2, 1, bias=False), # torch.Size([64, 1, 28, 28]) nn.Tanh() ) self.weight_init() # 参数初始化操作 def weight_init(self): for m in self.model.modules(): if isinstance(m, nn.ConvTranspose2d): nn.init.normal_(m.weight.data, 0, 0.02) elif isinstance(m, nn.BatchNorm2d): nn.init.normal_(m.weight.data, 0, 0.02) nn.init.constant_(m.bias.data, 0) def forward(self, input): # x = input.unsqueeze(-1).unsqueeze(-1) # torch.Size([64, 100, 1, 1]) x = self.model(input) # torch.Size([64, 1, 28, 28]) return x # 鉴别器结构 class Discriminator(nn.Module): def __init__(self): super(Discriminator, self).__init__() self.model = nn.Sequential( nn.Conv2d(1, 28, 4, 2, 1, bias=False), # torch.Size([64, 28, 14, 14]) nn.LeakyReLU(0.2, inplace=True), # 结果不会有影响,但是节省内存 nn.Conv2d(28, 56, 4, 2, 1, bias=False), # torch.Size([64, 56, 7, 7]) nn.BatchNorm2d(56), nn.LeakyReLU(0.2, inplace=True), nn.Conv2d(56, 112, 4, 2, 1, bias=False), # torch.Size([64, 112, 3, 3]) nn.BatchNorm2d(112), nn.LeakyReLU(0.2, inplace=True), nn.Conv2d(112, 224, 4, 2, 1, bias=False), # torch.Size([64, 224, 1, 1]) nn.BatchNorm2d(224), nn.LeakyReLU(0.2, inplace=True), nn.Conv2d(224, 1, 1, 1, 0, bias=False), # torch.Size([64, 1, 1, 1]) nn.Sigmoid() ) self.weight_init() # 参数初始化操作 def weight_init(self): for m in self.model.modules(): if isinstance(m, nn.ConvTranspose2d): nn.init.normal_(m.weight.data, 0, 0.02) elif isinstance(m, nn.BatchNorm2d): nn.init.normal_(m.weight.data, 0, 0.02) nn.init.constant_(m.bias.data, 0) def forward(self, input): x = self.model(input) x = x.view(input.size(0), -1) # torch.Size([64, 1]) return x
生成结果
3)结构3
# 生成器结构 class Generator(nn.Module): def __init__(self): super(Generator, self).__init__() net = [] # 1:设定每次反卷积的输入和输出通道数等 # 卷积核尺寸固定为4,反卷积输出为“SAME”模式 channels_in = [100, 512, 256, 128, 64] channels_out = [512, 256, 128, 64, 3] active = ["R", "R", "R", "R", "tanh"] stride = [1, 2, 2, 2, 2] padding = [0, 1, 1, 1, 1] for i in range(len(channels_in)): net.append(nn.ConvTranspose2d(in_channels=channels_in[i], out_channels=channels_out[i], kernel_size=4, stride=stride[i], padding=padding[i], bias=False)) if active[i] == "R": net.append(nn.BatchNorm2d(num_features=channels_out[i])) net.append(nn.ReLU()) elif active[i] == "tanh": net.append(nn.Tanh()) self.generator = nn.Sequential(*net) self.weight_init() def weight_init(self): for m in self.generator.modules(): if isinstance(m, nn.ConvTranspose2d): nn.init.normal_(m.weight.data, 0, 0.02) elif isinstance(m, nn.BatchNorm2d): nn.init.normal_(m.weight.data, 0, 0.02) nn.init.constant_(m.bias.data, 0) def forward(self, x): out = self.generator(x) return out # 鉴别器结构 class Discriminator(nn.Module): def __init__(self): """ initialize :param image_size: tuple (3, h, w) """ super().__init__() net = [] # 1:预先定义 channels_in = [3, 64, 128, 256, 512] channels_out = [64, 128, 256, 512, 1] padding = [1, 1, 1, 1, 0] active = ["LR", "LR", "LR", "LR", "sigmoid"] for i in range(len(channels_in)): net.append(nn.Conv2d(in_channels=channels_in[i], out_channels=channels_out[i], kernel_size=4, stride=2, padding=padding[i], bias=False)) if i == 0: net.append(nn.LeakyReLU(0.2)) elif active[i] == "LR": net.append(nn.BatchNorm2d(num_features=channels_out[i])) net.append(nn.LeakyReLU(0.2)) elif active[i] == "sigmoid": net.append(nn.Sigmoid()) self.discriminator = nn.Sequential(*net) self.weight_init() def weight_init(self): for m in self.discriminator.modules(): if isinstance(m, nn.ConvTranspose2d): nn.init.normal_(m.weight.data, 0, 0.02) elif isinstance(m, nn.BatchNorm2d): nn.init.normal_(m.weight.data, 0, 0.02) nn.init.constant_(m.bias.data, 0) def forward(self, x): out = self.discriminator(x) out = out.view(x.size(0), -1) return out
此结构好像是原paper的结构,参考链接:https://zhuanlan.zhihu.com/p/89487141