第G8周:ACGAN任务
- 🍨 本文为🔗365天深度学习训练营 中的学习记录博客
- 🍖 原作者:K同学啊
本周任务:
根据GAN、CGAN、SGAN及它们的框架图,写出ACGAN代码。
框架图
从图中可以看到,ACGAN的前半部分类似于CGAN,后半部分类似于SGAN,因此,代码前半部分模仿CGAN,后半部分模仿SGAN
配置代码
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
from torchvision.utils import save_image
from torchvision.utils import make_grid
from torch.utils.tensorboard import SummaryWriter
from torchsummary import summary
import matplotlib.pyplot as plt
import datetime
import argparsedevice = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
batch_size = 128
这里先定义基本常量,作用相当于parser = argparse.ArgumentParser()
class Args:n_epochs = 200batch_size = 64lr = 0.0002b1 = 0.5b2 = 0.999n_cpu = 8latent_dim = 100img_size = 128 # 看图像类型channels = 3 # 看图像类型sample_interval = 400opt = Args()
print(opt)
train_transform = transforms.Compose([transforms.Resize(128),transforms.ToTensor(),transforms.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5])])train_dataset = datasets.ImageFolder(root="F:/365data/G3/rps/", transform=train_transform)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True,num_workers=6)
def show_images(images):fig, ax = plt.subplots(figsize=(20, 20))ax.set_xticks([]); ax.set_yticks([])ax.imshow(make_grid(images.detach(), nrow=22).permute(1, 2, 0))def show_batch(dl):for images, _ in dl:show_images(images)break
image_shape = (3, 128, 128)
image_dim = int(np.prod(image_shape))
latent_dim = 100n_classes = 3
embedding_dim = 100
# 自定义权重初始化函数,用于初始化生成器和判别器的权重
def weights_init(m):# 获取当前层的类名classname = m.__class__.__name__# 如果当前层是卷积层(类名中包含 'Conv' )if classname.find('Conv') != -1:# 使用正态分布随机初始化权重,均值为0,标准差为0.02torch.nn.init.normal_(m.weight, 0.0, 0.02)# 如果当前层是批归一化层(类名中包含 'BatchNorm' )elif classname.find('BatchNorm') != -1:# 使用正态分布随机初始化权重,均值为1,标准差为0.02torch.nn.init.normal_(m.weight, 1.0, 0.02)# 将偏置项初始化为全零torch.nn.init.zeros_(m.bias)
class Generator(nn.Module):def __init__(self):super(Generator, self).__init__()# 定义条件标签的生成器部分,用于将标签映射到嵌入空间中# n_classes:条件标签的总数# embedding_dim:嵌入空间的维度self.label_conditioned_generator = nn.Sequential(nn.Embedding(n_classes, embedding_dim), # 使用Embedding层将条件标签映射为稠密向量nn.Linear(embedding_dim, 16) # 使用线性层将稠密向量转换为更高维度)# 定义潜在向量的生成器部分,用于将噪声向量映射到图像空间中# latent_dim:潜在向量的维度self.latent = nn.Sequential(nn.Linear(latent_dim, 4*4*512), # 使用线性层将潜在向量转换为更高维度nn.LeakyReLU(0.2, inplace=True) # 使用LeakyReLU激活函数进行非线性映射)# 定义生成器的主要结构,将条件标签和潜在向量合并成生成的图像self.model = nn.Sequential(# 反卷积层1:将合并后的向量映射为64x8x8的特征图nn.ConvTranspose2d(513, 64*8, 4, 2, 1, bias=False),nn.BatchNorm2d(64*8, momentum=0.1, eps=0.8), # 批标准化nn.ReLU(True), # ReLU激活函数# 反卷积层2:将64x8x8的特征图映射为64x4x4的特征图nn.ConvTranspose2d(64*8, 64*4, 4, 2, 1, bias=False),nn.BatchNorm2d(64*4, momentum=0.1, eps=0.8),nn.ReLU(True),# 反卷积层3:将64x4x4的特征图映射为64x2x2的特征图nn.ConvTranspose2d(64*4, 64*2, 4, 2, 1, bias=False),nn.BatchNorm2d(64*2, momentum=0.1, eps=0.8),nn.ReLU(True),# 反卷积层4:将64x2x2的特征图映射为64x1x1的特征图nn.ConvTranspose2d(64*2, 64*1, 4, 2, 1, bias=False),nn.BatchNorm2d(64*1, momentum=0.1, eps=0.8),nn.ReLU(True),# 反卷积层5:将64x1x1的特征图映射为3x64x64的RGB图像nn.ConvTranspose2d(64*1, 3, 4, 2, 1, bias=False),nn.Tanh() # 使用Tanh激活函数将生成的图像像素值映射到[-1, 1]范围内)def forward(self, inputs):noise_vector, label = inputs# 通过条件标签生成器将标签映射为嵌入向量label_output = self.label_conditioned_generator(label)# 将嵌入向量的形状变为(batch_size, 1, 4, 4),以便与潜在向量进行合并label_output = label_output.view(-1, 1, 4, 4)# 通过潜在向量生成器将噪声向量映射为潜在向量latent_output = self.latent(noise_vector)# 将潜在向量的形状变为(batch_size, 512, 4, 4),以便与条件标签进行合并latent_output = latent_output.view(-1, 512, 4, 4)# 将条件标签和潜在向量在通道维度上进行合并,得到合并后的特征图concat = torch.cat((latent_output, label_output), dim=1)# 通过生成器的主要结构将合并后的特征图生成为RGB图像image = self.model(concat)return image
generator = Generator().to(device)
generator.apply(weights_init)
print(generator)
from torchinfo import summarysummary(generator)
=================================================================
Layer (type:depth-idx) Param #
=================================================================
Generator --
├─Sequential: 1-1 --
│ └─Embedding: 2-1 300
│ └─Linear: 2-2 1,616
├─Sequential: 1-2 --
│ └─Linear: 2-3 827,392
│ └─LeakyReLU: 2-4 --
├─Sequential: 1-3 --
│ └─ConvTranspose2d: 2-5 4,202,496
│ └─BatchNorm2d: 2-6 1,024
│ └─ReLU: 2-7 --
│ └─ConvTranspose2d: 2-8 2,097,152
│ └─BatchNorm2d: 2-9 512
│ └─ReLU: 2-10 --
│ └─ConvTranspose2d: 2-11 524,288
│ └─BatchNorm2d: 2-12 256
│ └─ReLU: 2-13 --
│ └─ConvTranspose2d: 2-14 131,072
│ └─BatchNorm2d: 2-15 128
│ └─ReLU: 2-16 --
│ └─ConvTranspose2d: 2-17 3,072
│ └─Tanh: 2-18 --
=================================================================
Total params: 7,789,308
Trainable params: 7,789,308
Non-trainable params: 0
=================================================================
以上基本上是CGAN代码的前半部分,到生成器代码为止,以下为SGAN代码的部分
class Discriminator(nn.Module):def __init__(self, in_channels=3):super(Discriminator, self).__init__()def discriminator_block(in_filters, out_filters, bn=True):"""Returns layers of each discriminator block"""block = [nn.Conv2d(in_filters, out_filters, 3, 2, 1), nn.LeakyReLU(0.2, inplace=True), nn.Dropout2d(0.25)]if bn:block.append(nn.BatchNorm2d(out_filters, 0.8))return blockself.conv_blocks = nn.Sequential(*discriminator_block(opt.channels, 16, bn=False),*discriminator_block(16, 32),*discriminator_block(32, 64),*discriminator_block(64, 128),)# The height and width of downsampled imageds_size = opt.img_size // 2 ** 4# Output layersself.adv_layer = nn.Sequential(nn.Linear(128 * ds_size ** 2, 1), nn.Sigmoid())self.aux_layer = nn.Sequential(nn.Linear(128 * ds_size ** 2, opt.num_classes + 1), nn.Softmax())def forward(self, img):out = self.conv_blocks(img)out = out.view(out.shape[0], -1)validity = self.adv_layer(out)label = self.aux_layer(out)return validity, label
discriminator = Discriminator().to(device)
discriminator.apply(weights_init)
print(discriminator)
summary(discriminator)
adversarial_loss = nn.BCELoss()
auxiliary_loss = torch.nn.CrossEntropyLoss()def generator_loss(fake_output, label):gen_loss = adversarial_loss(fake_output, label)return gen_lossdef discriminator_loss(output, label):disc_loss = adversarial_loss(output, label)return disc_losscuda = torch.cuda.is_available()
FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if cuda else torch.LongTensor
learning_rate = 0.0002optimizer_G = optim.Adam(generator.parameters(), lr = learning_rate, betas=(0.5, 0.999))
optimizer_D = optim.Adam(discriminator.parameters(), lr = learning_rate, betas=(0.5, 0.999))
for epoch in range(opt.n_epochs):for i, (imgs, labels) in enumerate(train_loader):batch_size = imgs.shape[0]# Adversarial ground truthsvalid = Variable(FloatTensor(batch_size, 1).fill_(1.0), requires_grad=False)fake = Variable(FloatTensor(batch_size, 1).fill_(0.0), requires_grad=False)fake_aux_gt = Variable(LongTensor(batch_size).fill_(opt.num_classes), requires_grad=False)# Configure inputreal_imgs = Variable(imgs.type(FloatTensor))labels = Variable(labels.type(LongTensor))# -----------------# Train Generator# -----------------optimizer_G.zero_grad()# Sample noise and labels as generator inputz = Variable(FloatTensor(np.random.normal(0, 1, (batch_size, opt.latent_dim))))# Generate a batch of imagesgen_imgs = generator((z,labels))# Loss measures generator's ability to fool the discriminatorvalidity, _ = discriminator(gen_imgs)g_loss = adversarial_loss(validity, valid)g_loss.backward()optimizer_G.step()# ---------------------# Train Discriminator# ---------------------optimizer_D.zero_grad()# Loss for real imagesreal_pred, real_aux = discriminator(real_imgs)d_real_loss = (adversarial_loss(real_pred, valid) + auxiliary_loss(real_aux, labels)) / 2# Loss for fake imagesfake_pred, fake_aux = discriminator(gen_imgs.detach())d_fake_loss = (adversarial_loss(fake_pred, fake) + auxiliary_loss(fake_aux, fake_aux_gt)) / 2# Total discriminator lossd_loss = (d_real_loss + d_fake_loss) / 2# Calculate discriminator accuracypred = np.concatenate([real_aux.data.cpu().numpy(), fake_aux.data.cpu().numpy()], axis=0)gt = np.concatenate([labels.data.cpu().numpy(), fake_aux_gt.data.cpu().numpy()], axis=0)d_acc = np.mean(np.argmax(pred, axis=1) == gt)d_loss.backward()optimizer_D.step()batches_done = epoch * len(train_loader) + iif batches_done % opt.sample_interval == 0:save_image(gen_imgs.data[:25], "images/%d.png" % batches_done, nrow=5, normalize=True)print("[Epoch %d/%d] [Batch %d/%d] [D loss: %f, acc: %d%%] [G loss: %f]"% (epoch, opt.n_epochs, i, len(train_loader), d_loss.item(), 100 * d_acc, g_loss.item()))
运行过程
e:\anaconda3\envs\PGPU\lib\site-packages\torch\nn\modules\container.py:139: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.input = module(input)
[Epoch 0/50] [Batch 19/20] [D loss: 1.455980, acc: 40%] [G loss: 0.490490]
[Epoch 1/50] [Batch 19/20] [D loss: 1.222127, acc: 72%] [G loss: 0.681366]
[Epoch 2/50] [Batch 19/20] [D loss: 1.224287, acc: 63%] [G loss: 0.916321]
[Epoch 3/50] [Batch 19/20] [D loss: 1.111225, acc: 70%] [G loss: 1.007028]
[Epoch 4/50] [Batch 19/20] [D loss: 1.184606, acc: 75%] [G loss: 0.696607]
[Epoch 5/50] [Batch 19/20] [D loss: 1.352154, acc: 55%] [G loss: 0.747507]
[Epoch 6/50] [Batch 19/20] [D loss: 1.403305, acc: 52%] [G loss: 0.869919]
[Epoch 7/50] [Batch 19/20] [D loss: 1.311451, acc: 50%] [G loss: 0.880048]
[Epoch 8/50] [Batch 19/20] [D loss: 1.413715, acc: 50%] [G loss: 0.674482]
[Epoch 9/50] [Batch 19/20] [D loss: 1.326531, acc: 54%] [G loss: 0.609503]
[Epoch 10/50] [Batch 19/20] [D loss: 1.449468, acc: 48%] [G loss: 0.620321]
[Epoch 11/50] [Batch 19/20] [D loss: 1.367987, acc: 53%] [G loss: 0.717428]
[Epoch 12/50] [Batch 19/20] [D loss: 1.286323, acc: 55%] [G loss: 0.748294]
[Epoch 13/50] [Batch 19/20] [D loss: 1.374772, acc: 51%] [G loss: 0.849943]
[Epoch 14/50] [Batch 19/20] [D loss: 1.303872, acc: 55%] [G loss: 0.887458]
[Epoch 15/50] [Batch 19/20] [D loss: 1.338245, acc: 59%] [G loss: 0.566128]
[Epoch 16/50] [Batch 19/20] [D loss: 1.386614, acc: 59%] [G loss: 0.737729]
[Epoch 17/50] [Batch 19/20] [D loss: 1.378518, acc: 55%] [G loss: 0.559435]
[Epoch 18/50] [Batch 19/20] [D loss: 1.421224, acc: 53%] [G loss: 0.639280]
[Epoch 19/50] [Batch 19/20] [D loss: 1.314460, acc: 54%] [G loss: 0.695454]
[Epoch 20/50] [Batch 19/20] [D loss: 1.279016, acc: 56%] [G loss: 0.810150]
[Epoch 21/50] [Batch 19/20] [D loss: 1.364004, acc: 53%] [G loss: 0.736294]
[Epoch 22/50] [Batch 19/20] [D loss: 1.364638, acc: 52%] [G loss: 0.990328]
[Epoch 23/50] [Batch 19/20] [D loss: 1.322828, acc: 53%] [G loss: 0.731904]
[Epoch 24/50] [Batch 19/20] [D loss: 1.317570, acc: 50%] [G loss: 0.839391]
[Epoch 25/50] [Batch 19/20] [D loss: 1.330042, acc: 55%] [G loss: 0.755845]
[Epoch 26/50] [Batch 19/20] [D loss: 1.354234, acc: 55%] [G loss: 0.652750]
[Epoch 27/50] [Batch 19/20] [D loss: 1.383858, acc: 55%] [G loss: 0.677340]
[Epoch 28/50] [Batch 19/20] [D loss: 1.384538, acc: 52%] [G loss: 0.621817]
[Epoch 29/50] [Batch 19/20] [D loss: 1.314232, acc: 54%] [G loss: 0.783550]
[Epoch 30/50] [Batch 19/20] [D loss: 1.328900, acc: 54%] [G loss: 0.709978]
[Epoch 31/50] [Batch 19/20] [D loss: 1.326728, acc: 54%] [G loss: 0.804180]
[Epoch 32/50] [Batch 19/20] [D loss: 1.346232, acc: 52%] [G loss: 0.775322]
[Epoch 33/50] [Batch 19/20] [D loss: 1.290386, acc: 56%] [G loss: 0.939839]
[Epoch 34/50] [Batch 19/20] [D loss: 1.395943, acc: 50%] [G loss: 0.582599]
[Epoch 35/50] [Batch 19/20] [D loss: 1.394045, acc: 52%] [G loss: 0.716685]
[Epoch 36/50] [Batch 19/20] [D loss: 1.391289, acc: 51%] [G loss: 0.747493]
[Epoch 37/50] [Batch 19/20] [D loss: 1.369082, acc: 50%] [G loss: 0.719075]
[Epoch 38/50] [Batch 19/20] [D loss: 1.401712, acc: 53%] [G loss: 0.645679]
[Epoch 39/50] [Batch 19/20] [D loss: 1.279735, acc: 57%] [G loss: 0.710965]
[Epoch 40/50] [Batch 19/20] [D loss: 1.363157, acc: 56%] [G loss: 0.589386]
[Epoch 41/50] [Batch 19/20] [D loss: 1.334075, acc: 53%] [G loss: 0.774654]
[Epoch 42/50] [Batch 19/20] [D loss: 1.358592, acc: 51%] [G loss: 0.726460]
[Epoch 43/50] [Batch 19/20] [D loss: 1.389814, acc: 50%] [G loss: 0.703020]
[Epoch 44/50] [Batch 19/20] [D loss: 1.363462, acc: 53%] [G loss: 0.691942]
[Epoch 45/50] [Batch 19/20] [D loss: 1.362092, acc: 55%] [G loss: 0.727146]
[Epoch 46/50] [Batch 19/20] [D loss: 1.360469, acc: 53%] [G loss: 0.696875]
[Epoch 47/50] [Batch 19/20] [D loss: 1.385563, acc: 52%] [G loss: 0.661834]
[Epoch 48/50] [Batch 19/20] [D loss: 1.376729, acc: 50%] [G loss: 0.753325]
[Epoch 49/50] [Batch 19/20] [D loss: 1.370506, acc: 51%] [G loss: 0.687326]
总结
- 从框架图上看,ACGAN就是由CGAN和SGAN结合而来
- 因此结合的代码也可以成功运行
- 所以,ACGAN的输入为随机噪声z+条件信息C,而最终输出为真(真1、真2…) 或假也就是SGAN中的分类器功能