猫狗大战
前言
这个是一次大作业,然后最近花了两三天把它训练完并且搭建起了可以用的服务。
作业内容就是猫狗大战(猫狗数据集分类),要求是用tensorflow和pytorch分别实现。这本来是几年前kaggle中的一个竞赛,原本数据集有800多M,但是我为了省训练时间,从网上找了一个“阉割版”的数据集,一共就3000张图片。具体的下载方式在后面的代码中会提到。
环境的话,限制于显卡算力,Pytorch版本还是比较低的
python3.8
TF==2.2
Pytorch==1.2
1. TensorFlow版
import tensorflow as tf import os import random from tensorflow.keras import models,layers from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense,Conv2D,Flatten,Dropout,MaxPool2D from tensorflow.keras.preprocessing.image import ImageDataGenerator from tensorflow.keras.callbacks import EarlyStopping import numpy as np import matplotlib.pyplot as plt from sklearn.metrics import confusion_matrix plt.rcParams['font.sans-serif'] = ['simhei'] plt.rcParams['axes.unicode_minus'] = False 复制代码
1.1 获取数据集
#第一次运行需要将注释取消进行数据下载 # 数据集下载链接 #dataset_url = "https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip" # 开始下载,并且解压提取到设定文件夹 #dataset_path = tf.keras.utils.get_file("cats_and_dogs_filtered.zip", origin=dataset_url,cache_subdir="/home/a/桌面/python相关/人工智能期末作业-猫狗大战分类",extract=True) dataset_dir = os.path.join(os.path.dirname('/home/a/桌面/python相关/人工智能期末作业-猫狗大战分类/'), "cats_and_dogs_filtered") 复制代码
第一次运行的时候将注释的几行代码取消注释就可以下载数据集了。
1.2 载入划分训练集,并且构造数据生成器
# 将下载的数据按标签划分路径 train_cats = os.path.join(dataset_dir,"train","cats") train_dogs = os.path.join(dataset_dir,"train","dogs") test_cats = os.path.join(dataset_dir,"validation","cats") test_dogs = os.path.join(dataset_dir,"validation","dogs") train_dir = os.path.join(dataset_dir,"train") test_dir = os.path.join(dataset_dir,"validation") # 查看数据大小 train_dogs_num = len(os.listdir(train_dogs)) train_cats_num = len(os.listdir(train_cats)) test_dogs_num = len(os.listdir(test_dogs)) test_cats_num = len(os.listdir(test_cats)) train_all = train_cats_num+train_dogs_num test_all = test_cats_num+test_dogs_num print(train_all,test_all) 复制代码
训练集有2000张,测试集1000张,这样的话训练集与测试集比例为2:1其实是不太好的,往往我们还是喜欢7/3开,不过也很接近了。
在构造数据生成器之前,或者说进行任何数据操作之前,我们必须要查看数据,对数据有一定了解。通过查看一些图片,发现每一张图片的大小并不一致,那么就要想一个比较好的大小来约束所有图片(在保证显存训练够用的情况下)。下面就是构造生成器的一些处理
- 设置batch_size
- 读取文件夹下的图片
- 为了降低计算量以及数据大小,先将图片RGB归一化,变为0~1之间
- 设置好图片的大小,打乱数据
- 设置好seed,保证可重现
- 设置分类指标(二分类)
batch_size=64 height=224 width=224 train_generator=ImageDataGenerator( rescale=1./255. ).flow_from_directory( batch_size=batch_size, directory=train_dir, shuffle=True, seed=0, target_size=(height,width), class_mode="binary" ) test_generator=ImageDataGenerator( rescale=1./255. ).flow_from_directory( batch_size=batch_size, directory=test_dir, shuffle=False, seed=0, target_size=(height,width), class_mode="binary" ) 复制代码
然后利用构造好的生成器随机可视化一些图片
sample_training_images, labels = next(train_generator) sample_testing_images,test_labels=next(test_generator) d=train_generator.class_indices names=dict(zip(d.values(),d.keys())) def plotImages(images_arr,labels): fig, axes = plt.subplots(3, 5, figsize=(10,8)) axes = axes.flatten() for (img,label), ax in zip(zip(images_arr,labels), axes): ax.imshow(img) ax.set_title("类别为: "+str(int(label))+" "+names[label]) ax.axes.xaxis.set_visible(False) ax.axes.yaxis.set_visible(False) plt.tight_layout() plt.show() plotImages(sample_training_images[:15],labels[:15]) plotImages(sample_testing_images[:15],test_labels[:15]) 复制代码
1.3 模型构建与训练
一开始我满怀信心,打算构造一个复杂点的模型,直接搭建一个DenseNet
class ConvBlock(tf.keras.layers.Layer): def __init__(self, num_channels): super(ConvBlock, self).__init__() self.bn = tf.keras.layers.BatchNormalization() self.relu = tf.keras.layers.ReLU() self.conv = tf.keras.layers.Conv2D( filters=num_channels, kernel_size=(3, 3), padding='same') self.listLayers = [self.bn, self.relu, self.conv] def call(self, x): y = x for layer in self.listLayers.layers: y = layer(y) y = tf.keras.layers.concatenate([x,y], axis=-1) return y # 输出通道数为num_convs*num_channels+输入通道数 class DenseBlock(tf.keras.layers.Layer): def __init__(self, num_convs, num_channels): super(DenseBlock, self).__init__() self.listLayers = [] for _ in range(num_convs): self.listLayers.append(ConvBlock(num_channels)) def call(self, x): for layer in self.listLayers.layers: x = layer(x) return x class TransitionBlock(tf.keras.layers.Layer): def __init__(self, num_channels, **kwargs): super(TransitionBlock, self).__init__(**kwargs) self.batch_norm = tf.keras.layers.BatchNormalization() self.relu = tf.keras.layers.ReLU() self.conv = tf.keras.layers.Conv2D(num_channels, kernel_size=1) self.avg_pool = tf.keras.layers.AvgPool2D(pool_size=2, strides=2) def call(self, x): x = self.batch_norm(x) x = self.relu(x) x = self.conv(x) return self.avg_pool(x) def block_1(): return tf.keras.Sequential([ tf.keras.layers.Conv2D(256, kernel_size=7, strides=2, padding='same'), tf.keras.layers.BatchNormalization(), tf.keras.layers.ReLU(), tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same')]) def block_2(): net = block_1() # num_channels为当前的通道数 num_channels, growth_rate = 256,32 num_convs_in_dense_blocks = [4,4,4,4] for i, num_convs in enumerate(num_convs_in_dense_blocks): net.add(DenseBlock(num_convs, growth_rate)) # 上一个稠密块的输出通道数 num_channels += num_convs * growth_rate # 在稠密块之间添加一个转换层,使通道数量减半 if i != len(num_convs_in_dense_blocks) - 1: num_channels //= 2 net.add(TransitionBlock(num_channels)) return net def DenseNet(): net = block_2() net.add(tf.keras.layers.BatchNormalization()) net.add(tf.keras.layers.GlobalAvgPool2D()) net.add(tf.keras.layers.LeakyReLU(0.1)) net.add(tf.keras.layers.Flatten()) net.add(tf.keras.layers.Dense(128)) net.add(tf.keras.layers.LeakyReLU(0.1)) net.add(tf.keras.layers.Dense(1,activation='sigmoid')) return net 复制代码
训练试一下
num_epochs=5 lr=1e-4 # 实例化网络 densenet=DenseNet() optimizer=tf.keras.optimizers.RMSprop(lr=lr) loss=tf.keras.losses.BinaryCrossentropy(from_logits=True) densenet.build(input_shape=(None,height,width,3)) densenet.summary() densenet.compile(optimizer=optimizer,loss=loss,metrics=['accuracy']) history=densenet.fit( train_generator, epochs=num_epochs, validation_data=test_generator, ) densenet.evaluate(test_generator) 复制代码
def trainning_plot(history,num_epochs): x=[i for i in range(num_epochs)] plt.figure() plt.plot(x,history.history['accuracy'],label='accuracy') plt.plot(x,history.history['val_accuracy'],label='val_accuracy') plt.plot(x,history.history['loss'],label='loss') plt.plot(x,history.history['val_loss'],label='val_loss') plt.legend() plt.xlabel("Epochs") plt.show() trainning_plot(history,num_epochs) 复制代码
从结果上看,模型实际上和盲猜的准确率一样,也就是说什么也没有学到。这也就是由于训练集和测试集比例不当造成的,总体训练的数据相对来说还是比较少。因此,为了扩充数据集,增强模型的鲁棒性和泛化能力,需要使用数据增强。
# 加入对训练数据进行平移、旋转、随机缩放等等 train_generator=ImageDataGenerator( rescale=1./255., rotation_range=40, width_shift_range=0.1, height_shift_range=0.1, shear_range=0.2, zoom_range=0.2, horizontal_flip=True, fill_mode='nearest', ).flow_from_directory( batch_size=batch_size, directory=train_dir, shuffle=True, seed=0, target_size=(height,width), class_mode="binary" ) 复制代码
再来训练试试
num_epochs=25 history=densenet.fit( train_generator, epochs=num_epochs, validation_data=test_generator ) 复制代码
有了一定的提升,但是对于二分类问题这种准确率还是太低了,并且训练曲线动荡。思考一下准确率较低的原因,往往是因为模型太过复杂而数据量较小导致的训练无法收敛。在这种情况下,那可以试试使用较为简单的模型进行训练,试试在简单模型下是否能较为平稳地训练,搭建了一个最基本的CNN
from tensorflow.keras import layers,models,regularizers model = models.Sequential() model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3))) model.add(layers.MaxPooling2D((2, 2))) model.add(layers.Conv2D(64, (3, 3), activation='relu')) model.add(layers.MaxPooling2D((2, 2))) model.add(layers.Conv2D(64, (3, 3), activation='relu')) model.add(layers.MaxPooling2D((2, 2))) model.add(layers.Conv2D(128, (3, 3), activation='relu')) model.add(layers.MaxPooling2D((2, 2))) model.add(layers.Conv2D(128, (3, 3), activation='relu')) model.add(layers.MaxPooling2D((2, 2))) model.add(layers.Flatten()) model.add(layers.Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.001))) model.add(layers.Dropout(0.2)) model.add(layers.Dense(1, activation='sigmoid')) print(model.summary()) model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=5e-4), loss='binary_crossentropy', metrics='accuracy') history = model.fit( train_generator, epochs=30, validation_data=test_generator, callbacks=[EarlyStopping(monitor='val_accuracy', min_delta=0.001, patience=5, verbose=1)] ) 复制代码
这里引入了正则化、早停、遗忘……希望能在简单模型上学习到比较好的参数
这里并没有早停而是训练了30个epoch,测试集上准确率提高到了81.6%,说明我们的猜想是对的(继续训练也许会继续提高准确率)。复杂的模型需要大量的数据来训练,而对于我们这少量的模型而言,简单的网络可能有更好的性能。既然如此,我们就可以利用迁移学习,将别人在大量数据集中训练好的模型拿过来直接用,这样也就避免了训练收敛困难的问题。
backbone=tf.keras.applications.DenseNet201(weights='imagenet',include_top=False,input_shape=(height,width,3)) backbone.trainable=False transfer_model=Sequential() transfer_model.add(backbone) transfer_model.add(tf.keras.layers.GlobalAveragePooling2D()) transfer_model.add(Dense(512,activation='relu')) transfer_model.add(Dense(1,activation='sigmoid')) transfer_model.summary() # 设置动态学习率,指数衰减 init_lr=1e-4 lr=tf.keras.optimizers.schedules.ExponentialDecay( initial_learning_rate=init_lr, decay_steps=50, decay_rate=0.96, staircase=True ) optimizer=tf.keras.optimizers.Adam(learning_rate=lr) loss=tf.keras.losses.BinaryCrossentropy(from_logits=True) transfer_model.compile(optimizer=optimizer, loss=loss, metrics='accuracy') history = transfer_model.fit( train_generator, epochs=60, validation_data=test_generator, callbacks=[EarlyStopping(monitor='val_accuracy', min_delta=0.001, patience=5, verbose=1)] ) 复制代码
最终效果堪称完美,测试集上准确率达到98.9%,那就利用这个模型随机预测一些图片看看
plt.figure(figsize=(10,8)) # 获得原始的分类字典,并进行字典键值对互换 d=test_generator.class_indices label_names=dict(zip(d.values(), d.keys())) # 随机打乱测试集来看看预测效果 pre_generator=ImageDataGenerator( rescale=1./255. ).flow_from_directory( batch_size=batch_size, directory=test_dir, shuffle=True, seed=0, target_size=(height,width), class_mode="binary" ) plt.suptitle("预测结果") for images,labels in pre_generator: for i in range(25): ax = plt.subplot(5,5,i+1) plt.imshow(images[i]) img_array = tf.expand_dims(images[i], 0) # 使用模型预测图片中的动物 predictions = transfer_model.predict(img_array) predictions= 1 if predictions>=0.5 else 0 plt.title(label_names[predictions]) plt.axis("off") break plt.show() 复制代码
看上去貌似都预测正确,再看看混淆矩阵
def plot_confusion_matrix(cm,classes, title='混淆矩阵'): plt.figure(figsize=(12, 8), dpi=100) np.set_printoptions(precision=2) # 在混淆矩阵中每格的概率值 ind_array = np.arange(len(classes)) x, y = np.meshgrid(ind_array, ind_array) for x_val, y_val in zip(x.flatten(), y.flatten()): c = cm[y_val][x_val] if c > 0.001: plt.text(x_val, y_val, "%0.2f" % (c,), color='red', fontsize=15, va='center', ha='center') plt.imshow(cm, interpolation='nearest') plt.title(title) xlocations = np.array(range(len(classes))) plt.xticks(xlocations, classes, rotation=90) plt.yticks(xlocations, classes) plt.ylabel('真实值') plt.xlabel('预测值') plt.show() test_predict=transfer_model.predict_classes(test_generator,batch_size=batch_size) test_names=list(test_generator.class_indices) test_true=test_generator.classes matrix=confusion_matrix(test_true,test_predict) plot_confusion_matrix(matrix,test_names) 复制代码
那这么好的模型当然得保存下来,留着后面用
transfer_model.save('tf_model/transfer_model')