VGG在我之前的博客中已经做过详解,详情见:
https://blog.csdn.net/muye_IT/article/details/123797416
代码已提交github,详情见(麻烦Star!):
https://github.com/Jasper0420/Deep-Learning-Practice-VGG
1. VGG16
VGG网络有多个版本,一般常用的是VGG-16模型,其网络结构如下如所示:
2. 使用PyTorch搭建VGG网络
2.1 model.py
VGG网络分为 卷积层提取特征 和 全连接层进行分类 这两个模块
import torch.nn as nn import torch class VGG(nn.Module): def __init__(self, features, num_classes=1000, init_weights=False):# super(VGG, self).__init__() self.features = features # 卷积层提取特征 self.classifier = nn.Sequential( # 3层全连接层进行分类 nn.Dropout(p=0.5), nn.Linear(512*7*7, 2048), nn.ReLU(True), nn.Dropout(p=0.5), nn.Linear(2048, 2048), nn.ReLU(True), nn.Linear(2048, num_classes) ) if init_weights: self._initialize_weights() def forward(self, x): # N x 3 x 224 x 224 x = self.features(x) # N x 512 x 7 x 7 x = torch.flatten(x, start_dim=1) # N x 512*7*7 x = self.classifier(x) return x def _initialize_weights(self): for m in self.modules(): if isinstance(m, nn.Conv2d): # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') nn.init.xavier_uniform_(m.weight) if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): nn.init.xavier_uniform_(m.weight) # nn.init.normal_(m.weight, 0, 0.01) nn.init.constant_(m.bias, 0)
但是VGG网络有 VGG-13、VGG-16等多种网络结构,我们能不能将这几种结构通过代码集合成一个模型呢?
以上图的A、B、D、E模型为例,其全连接层完全一样,卷积层只有卷积核个数稍有不同。
# vgg网络模型配置列表,数字表示卷积核个数,'M'表示最大池化层 cfgs = { 'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], # 模型A 'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], # 模型B 'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], # 模型D 'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], # 模型E } # 卷积层提取特征 def make_features(cfg: list): # 传入的是具体某个模型的参数列表 layers = [] in_channels = 3 # 输入的原始图像(rgb三通道) for v in cfg: # 最大池化层 if v == "M": layers += [nn.MaxPool2d(kernel_size=2, stride=2)] # 卷积层 else: conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) layers += [conv2d, nn.ReLU(True)] in_channels = v return nn.Sequential(*layers) # 单星号(*)将参数以元组(tuple)的形式导入 def vgg(model_name="vgg16", **kwargs): # 双星号(**)将参数以字典的形式导入 try: cfg = cfgs[model_name] except: print("Warning: model number {} not in cfgs dict!".format(model_name)) exit(-1) model = VGG(make_features(cfg), **kwargs) return model
扩展:Python 函数参数前面一个星号(*)和两个星号(**)的区别
在 Python 的函数中经常能看到输入的参数前面有一个或者两个星号,例如:
def foo(param1, *param2): def bar(param1, **param2):
这两种用法其实都是用来将任意个数的参数导入到 Python 函数中。
单星号(*):*agrs
将所有参数以元组(tuple)的形式导入:
def foo(param1, *param2): print (param1) print (param2) foo(1,2,3,4,5)
以上代码输出结果为:
1 (2, 3, 4, 5)
双星号(**):kwargs
双星号()将参数以字典的形式导入:
def bar(param1, **param2): print (param1) print (param2) bar(1,a=2,b=3)
以上代码输出结果为:
1 {'a': 2, 'b': 3}
此外,单星号的另一个用法是解压参数列表:
def foo(runoob_1, runoob_2): print(runoob_1, runoob_2) l = [1, 2] foo(*l)
以上代码输出结果为:
1 2
当然这两个用法可以同时出现在一个函数中:
def foo(a, b=10, *args, **kwargs): print (a) print (b) print (args) print (kwargs) foo(1, 2, 3, 4, e=5, f=6, g=7)
以上代码输出结果为:
1 2 (3, 4) {'e': 5, 'f': 6, 'g': 7}
2.2 train.py
训练脚本跟上一篇AlexNet基本一致,需要注意的是实例化网络的过程:
model_name = "vgg16" net = vgg(model_name=model_name, num_classes=5, init_weights=True)
函数调用关系:
net = vgg(model_name="vgg16", num_classes=5, init_weights=True) cfg = cfgs[model_name] = cfgs[vgg16] = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'] model = VGG(make_features(cfg), num_classes=5, init_weights=True) make_features(cfg: list)
2.3 predict.py
train.py ——加载数据集并训练,训练集计算loss,测试集计算accuracy,保存训练好的网络参数
2.3.1 相关包的加载
import os import sys import json import torch import torch.nn as nn from torchvision import transforms, datasets import torch.optim as optim from tqdm import tqdm from model import vgg
2.3.2 数据预处理
data_transform = { "train": transforms.Compose([transforms.RandomResizedCrop(224),#随机裁剪 transforms.RandomHorizontalFlip(),#随机水平翻转 transforms.ToTensor(),#转成Tensor格式 transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),#标准化处理 "val": transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}
2.3.3 加载训练集
# 获取图像数据集的路径 data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path assert os.path.exists(image_path), "{} path does not exist.".format(image_path) # 导入训练集并进行预处理 train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"), transform=data_transform["train"]) train_num = len(train_dataset) #为了方便在 predict 时读取信息,将 索引:标签 存入到一个 json 文件中 # 字典,类别:索引 {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4} flower_list = train_dataset.class_to_idx # 将 flower_list 中的 key 和 val 调换位置 cla_dict = dict((val, key) for key, val in flower_list.items()) # 将 cla_dict 写入 json 文件中 json_str = json.dumps(cla_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) batch_size = 64 nw =0 # number of workers print('Using {} dataloader workers every process'.format(nw)) # 按batch_size分批次加载训练集 train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=nw)
2.3.4 加载验证集
validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"), transform=data_transform["val"]) val_num = len(validate_dataset) validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=4, shuffle=False, num_workers=nw)
2.3.5 训练网络与验证网络
model_name = "vgg16" net = vgg(model_name=model_name, num_classes=5, init_weights=True) net.to(device) loss_function = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters(), lr=0.0001) epochs = 30 best_acc = 0.0 save_path = './{}Net.pth'.format(model_name) train_steps = len(train_loader) for epoch in range(epochs): # train net.train() running_loss = 0.0 train_bar = tqdm(train_loader, file=sys.stdout) for step, data in enumerate(train_bar): images, labels = data optimizer.zero_grad() outputs = net(images.to(device)) loss = loss_function(outputs, labels.to(device)) loss = loss.requires_grad_(True) loss.backward() optimizer.step() # print statistics running_loss += loss.item() train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1, epochs, loss) # validate net.eval() acc = 0.0 # accumulate accurate number / epoch with torch.no_grad(): val_bar = tqdm(validate_loader, file=sys.stdout) for val_data in val_bar: val_images, val_labels = val_data outputs = net(val_images.to(device)) predict_y = torch.max(outputs, dim=1)[1] acc += torch.eq(predict_y, val_labels.to(device)).sum().item() val_accurate = acc / val_num print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' % (epoch + 1, running_loss / train_steps, val_accurate)) if val_accurate > best_acc: best_acc = val_accurate torch.save(net.state_dict(), save_path) print('Finished Training') if hasattr(torch.cuda, 'empty_cache'): torch.cuda.empty_cache() if __name__ == '__main__': main()
2.3.6 完整代码
import os import sys import json import torch import torch.nn as nn from torchvision import transforms, datasets import torch.optim as optim from tqdm import tqdm from model import vgg def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("using {} device.".format(device)) data_transform = { "train": transforms.Compose([transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]), "val": transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])} data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path assert os.path.exists(image_path), "{} path does not exist.".format(image_path) train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"), transform=data_transform["train"]) train_num = len(train_dataset) # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4} flower_list = train_dataset.class_to_idx cla_dict = dict((val, key) for key, val in flower_list.items()) # write dict into json file json_str = json.dumps(cla_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) batch_size = 32 nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=nw) validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"), transform=data_transform["val"]) val_num = len(validate_dataset) validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=batch_size, shuffle=False, num_workers=nw) print("using {} images for training, {} images for validation.".format(train_num, val_num)) # test_data_iter = iter(validate_loader) # test_image, test_label = test_data_iter.next() model_name = "vgg16" net = vgg(model_name=model_name, num_classes=5, init_weights=True) net.to(device) loss_function = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters(), lr=0.0001) epochs = 30 best_acc = 0.0 save_path = './{}Net.pth'.format(model_name) train_steps = len(train_loader) for epoch in range(epochs): # train net.train() running_loss = 0.0 train_bar = tqdm(train_loader, file=sys.stdout) for step, data in enumerate(train_bar): images, labels = data optimizer.zero_grad() outputs = net(images.to(device)) loss = loss_function(outputs, labels.to(device)) loss = loss.requires_grad_(True) loss.backward() optimizer.step() # print statistics running_loss += loss.item() train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1, epochs, loss) # validate net.eval() acc = 0.0 # accumulate accurate number / epoch with torch.no_grad(): val_bar = tqdm(validate_loader, file=sys.stdout) for val_data in val_bar: val_images, val_labels = val_data outputs = net(val_images.to(device)) predict_y = torch.max(outputs, dim=1)[1] acc += torch.eq(predict_y, val_labels.to(device)).sum().item() val_accurate = acc / val_num print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' % (epoch + 1, running_loss / train_steps, val_accurate)) if val_accurate > best_acc: best_acc = val_accurate torch.save(net.state_dict(), save_path) print('Finished Training') if hasattr(torch.cuda, 'empty_cache'): torch.cuda.empty_cache() if __name__ == '__main__': main()