首先导入必要的包
paddle.fluid--->PaddlePaddle深度学习框架
os------------->python的模块,可使用该模块对操作系统进行操作
# # 导入需要的包 import paddle import numpy as np from PIL import Image import sys from multiprocessing import cpu_count import matplotlib.pyplot as plt import os from paddle.nn import MaxPool2D,Conv2D,BatchNorm2D from paddle.nn import Linear print("本教程基于Paddle的版本号为:"+paddle.__version__)
本教程基于Paddle的版本号为:2.0.0 /opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/matplotlib/__init__.py:107: DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated, and in 3.8 it will stop working from collections import MutableMapping /opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/matplotlib/rcsetup.py:20: DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated, and in 3.8 it will stop working from collections import Iterable, Mapping /opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/matplotlib/colors.py:53: DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated, and in 3.8 it will stop working from collections import Sized
# 同时添加如下代码, 这样每次环境(kernel)启动的时候只要运行下方代码即可: # Also add the following code, # so that every time the environment (kernel) starts, # just run the following code: import sys sys.path.append('/home/aistudio/external-libraries')
Step1:准备数据
(1)数据集介绍
我们使用CIFAR10数据集。CIFAR10数据集包含60,000张32x32的彩色图片,10个类别,每个类包含6,000张。其中50,000张图片作为训练集,10000张作为验证集。这次我们只对其中的猫和狗两类进行预测。
(2)train_reader和test_reader
自定义读取器处理训练集和测试集
paddle.reader.shuffle()表示每次缓存BUF_SIZE个数据项,并进行打乱
paddle.batch()表示每BATCH_SIZE组成一个batch
#!tar -zxvf /home/aistudio/data/data9154/cifar-10-python.tar.gz
def unpickle(file): import pickle with open(file, 'rb') as fo: dict = pickle.load(fo, encoding='bytes') return dict print(unpickle("cifar-10-batches-py/data_batch_1").keys()) print(unpickle("cifar-10-batches-py/test_batch").keys())
dict_keys([b'batch_label', b'labels', b'data', b'filenames']) dict_keys([b'batch_label', b'labels', b'data', b'filenames'])
def test_mapper(sample): img, label = sample #将img数组进行进行归一化处理,得到0到1之间的数值 img= img.flatten().astype('float32')/255.0 return img, label
def train_mapper(sample): img, label = sample #将img数组进行进行归一化处理,得到0到1之间的数值 img= img.flatten().astype('float32')/255.0 return img, label
def train_r( buffered_size=1024): def reader(): xs=[] ys=[] for i in range(1,6): train_dict=unpickle("cifar-10-batches-py/data_batch_%d" % (i,)) xs.append(train_dict[b'data']) ys.append(train_dict[b'labels']) Xtr = np.concatenate(xs) Ytr = np.concatenate(ys) for (x,y) in zip(Xtr,Ytr): yield x, int(y) return (train_mapper, reader,cpu_count(), buffered_size)
def test_r( buffered_size=1024): def reader(): test_dict=unpickle("cifar-10-batches-py/test_batch") X=test_dict[b'data'] Y=test_dict[b'labels'] for (x,y) in zip(X,Y): yield x, int(y) return paddle.reader.xmap_readers(test_mapper, reader,cpu_count(), buffered_size)
''' 自定义数据集 ''' import paddle from paddle.io import Dataset class MyDataset(paddle.io.Dataset): """ 步骤一:继承paddle.io.Dataset类 """ def __init__(self, mode='train'): """ 步骤二:实现构造函数,定义数据集大小 """ super(MyDataset, self).__init__() if mode == 'train': xs=[] ys=[] self.data = [] self.label = [] for i in range(1,6): train_dict=unpickle("cifar-10-batches-py/data_batch_%d" % (i,)) xs.append(train_dict[b'data']) ys.append(train_dict[b'labels']) Xtr = np.concatenate(xs) Ytr = np.concatenate(ys) for (x,y) in zip(Xtr,Ytr): x= x.flatten().astype('float32')/255.0 self.data.append(x) self.label.append(np.array(y).astype('int64')) else: self.data = [] self.label = [] test_dict=unpickle("cifar-10-batches-py/test_batch") X=test_dict[b'data'] Y=test_dict[b'labels'] for (x,y) in zip(X,Y): x= x.flatten().astype('float32')/255.0 self.data.append(x) self.label.append(np.array(y).astype('int64')) def __getitem__(self, index): """ 步骤三:实现__getitem__方法,定义指定index时如何获取数据,并返回单条数据(训练数据,对应的标签) """ data = self.data[index] label = self.label[index] return data, np.array(label, dtype='int64') def __len__(self): """ 步骤四:实现__len__方法,返回数据集总数目 """ return len(self.data) # 测试定义的数据集 train_dataset = MyDataset(mode='train') eval_dataset = MyDataset(mode='val') print('=============train_dataset =============') print(train_dataset.__getitem__(1)[0].shape,train_dataset.__getitem__(1)[1]) print(train_dataset.__len__()) print('=============eval_dataset =============') for data, label in eval_dataset: print(data.shape, label) break print(eval_dataset.__len__())
=============train_dataset ============= (3072,) 9 50000 =============eval_dataset ============= (3072,) 3 10000
Step2.网络配置
(1)网络搭建
*** CNN网络模型
在CNN模型中,卷积神经网络能够更好的利用图像的结构信息。下面定义了一个较简单的卷积神经网络。显示了其结构:输入的二维图像,先经过三次卷积层、池化层和Batchnorm,再经过全连接层,最后使用softmax分类作为输出层。
池化是非线性下采样的一种形式,主要作用是通过减少网络的参数来减小计算量,并且能够在一定程度上控制过拟合。通常在卷积层的后面会加上一个池化层。paddlepaddle池化默认为最大池化。是用不重叠的矩形框将输入层分成不同的区域,对于每个矩形框的数取最大值作为输出
Batchnorm顾名思义是对每batch个数据同时做一个norm。作用就是在深度神经网络训练过程中使得每一层神经网络的输入保持相同分布的。
#定义CNN网络 class MyCNN(paddle.nn.Layer): def __init__(self): super(MyCNN,self).__init__() self.conv0 = paddle.nn.Conv2D(in_channels=3, out_channels=20, kernel_size=5, padding=0) self.pool0 = paddle.nn.MaxPool2D(kernel_size =2, stride =2) self._batch_norm_0 = paddle.nn.BatchNorm2D(num_features = 20) self.conv1 = paddle.nn.Conv2D(in_channels=20, out_channels=50, kernel_size=5, padding=0) self.pool1 = paddle.nn.MaxPool2D(kernel_size =2, stride =2) self._batch_norm_1 = paddle.nn.BatchNorm2D(num_features = 50) self.conv2 = paddle.nn.Conv2D(in_channels=50, out_channels=50, kernel_size=5, padding=0) self.pool2 = paddle.nn.MaxPool2D(kernel_size =2, stride =2) self.fc1 = paddle.nn.Linear(in_features=50, out_features=10) def forward(self,input): input=paddle.reshape(input, shape=[-1, 3,32,32]) x = self.conv0(input) x = paddle.nn.functional.relu(x) x = self.pool0(x) x = self._batch_norm_0(x) x = self.conv1(x) x = paddle.nn.functional.relu(x) x = self.pool1(x) x = self._batch_norm_1(x) x = self.conv2(x) x = paddle.nn.functional.relu(x) x = self.pool2(x) x = paddle.reshape(x, [x.shape[0], -1]) x = self.fc1(x) y = paddle.nn.functional.softmax(x) return y
Step3.模型训练 and Step4.模型评估
#step3:训练模型 # 用Model封装模型 model = paddle.Model(MyCNN()) model.summary(input_size=(1,3, 32, 32))
--------------------------------------------------------------------------- Layer (type) Input Shape Output Shape Param # =========================================================================== Conv2D-1 [[1, 3, 32, 32]] [1, 20, 28, 28] 1,520 MaxPool2D-1 [[1, 20, 28, 28]] [1, 20, 14, 14] 0 BatchNorm2D-1 [[1, 20, 14, 14]] [1, 20, 14, 14] 80 Conv2D-2 [[1, 20, 14, 14]] [1, 50, 10, 10] 25,050 MaxPool2D-2 [[1, 50, 10, 10]] [1, 50, 5, 5] 0 BatchNorm2D-2 [[1, 50, 5, 5]] [1, 50, 5, 5] 200 Conv2D-3 [[1, 50, 5, 5]] [1, 50, 1, 1] 62,550 MaxPool2D-3 [[1, 50, 1, 1]] [1, 50, 1, 1] 0 Linear-1 [[1, 50]] [1, 10] 510 =========================================================================== Total params: 89,910 Trainable params: 89,630 Non-trainable params: 280 --------------------------------------------------------------------------- Input size (MB): 0.01 Forward/backward pass size (MB): 0.24 Params size (MB): 0.34 Estimated Total Size (MB): 0.59 --------------------------------------------------------------------------- {'total_params': 89910, 'trainable_params': 89630}
model.prepare(paddle.optimizer.Adam(learning_rate=0.0005, parameters=model.parameters()), paddle.nn.CrossEntropyLoss(), paddle.metric.Accuracy()) # 训练可视化VisualDL工具的回调函数 visualdl = paddle.callbacks.VisualDL(log_dir='visualdl_log') # 启动模型全流程训练 model.fit(train_dataset, epochs=50, batch_size=256, verbose=1) #保存模型 model.save('model_save_dir')
The loss value printed in the log is the current step, and the metric is the average value of previous step. Epoch 1/50 step 20/196 [==>...........................] - loss: 2.1580 - acc: 0.2246 - ETA: 3s - 18ms/st /opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/layers/utils.py:77: DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated, and in 3.8 it will stop working return (isinstance(seq, collections.Sequence) and /opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/nn/layer/norm.py:636: UserWarning: When training, we now always track global mean and variance. "When training, we now always track global mean and variance.") step 40/196 [=====>........................] - loss: 2.1560 - acc: 0.2699 - ETA: 1s - 13ms/stepstep 196/196 [==============================] - loss: 1.9848 - acc: 0.4067 - 9ms/step Epoch 2/50 step 196/196 [==============================] - loss: 1.8542 - acc: 0.5571 - 8ms/step Epoch 3/50 step 196/196 [==============================] - loss: 1.9660 - acc: 0.6163 - 8ms/step Epoch 4/50 step 196/196 [==============================] - loss: 1.8622 - acc: 0.6516 - 9ms/step Epoch 5/50 step 196/196 [==============================] - loss: 1.7989 - acc: 0.6803 - 7ms/step Epoch 6/50 step 196/196 [==============================] - loss: 1.7298 - acc: 0.7024 - 8ms/step Epoch 7/50 step 196/196 [==============================] - loss: 1.7754 - acc: 0.7196 - 8ms/step Epoch 8/50 step 196/196 [==============================] - loss: 1.6815 - acc: 0.7379 - 8ms/step Epoch 9/50 step 196/196 [==============================] - loss: 1.8101 - acc: 0.7509 - 8ms/step Epoch 10/50 step 196/196 [==============================] - loss: 1.6813 - acc: 0.7647 - 7ms/step Epoch 11/50 step 196/196 [==============================] - loss: 1.6892 - acc: 0.7755 - 8ms/step Epoch 12/50 step 196/196 [==============================] - loss: 1.7464 - acc: 0.7867 - 8ms/step Epoch 13/50 step 196/196 [==============================] - loss: 1.6684 - acc: 0.7973 - 8ms/step Epoch 14/50 step 196/196 [==============================] - loss: 1.6570 - acc: 0.8043 - 8ms/step Epoch 15/50 step 196/196 [==============================] - loss: 1.7619 - acc: 0.8107 - 8ms/step Epoch 16/50 step 196/196 [==============================] - loss: 1.6384 - acc: 0.8200 - 8ms/step Epoch 17/50 step 196/196 [==============================] - loss: 1.6496 - acc: 0.8268 - 9ms/step Epoch 18/50 step 196/196 [==============================] - loss: 1.5964 - acc: 0.8312 - 11ms/step Epoch 19/50 step 196/196 [==============================] - loss: 1.6590 - acc: 0.8370 - 10ms/step Epoch 20/50 step 196/196 [==============================] - loss: 1.6211 - acc: 0.8415 - 10ms/step Epoch 21/50 step 196/196 [==============================] - loss: 1.6676 - acc: 0.8439 - 9ms/step Epoch 22/50 step 196/196 [==============================] - loss: 1.6594 - acc: 0.8490 - 10ms/step Epoch 23/50 step 196/196 [==============================] - loss: 1.5433 - acc: 0.8521 - 10ms/step Epoch 24/50 step 196/196 [==============================] - loss: 1.5985 - acc: 0.8551 - 10ms/step Epoch 25/50 step 196/196 [==============================] - loss: 1.5646 - acc: 0.8580 - 10ms/step Epoch 26/50 step 196/196 [==============================] - loss: 1.5897 - acc: 0.8608 - 10ms/step Epoch 27/50 step 196/196 [==============================] - loss: 1.5450 - acc: 0.8625 - 10ms/step Epoch 28/50 step 196/196 [==============================] - loss: 1.5929 - acc: 0.8646 - 8ms/step Epoch 29/50 step 196/196 [==============================] - loss: 1.6742 - acc: 0.8669 - 9ms/step Epoch 30/50 step 196/196 [==============================] - loss: 1.5607 - acc: 0.8684 - 9ms/step Epoch 31/50 step 196/196 [==============================] - loss: 1.6057 - acc: 0.8709 - 11ms/step Epoch 32/50 step 196/196 [==============================] - loss: 1.6948 - acc: 0.8716 - 10ms/step Epoch 33/50 step 196/196 [==============================] - loss: 1.5785 - acc: 0.8732 - 9ms/step Epoch 34/50 step 196/196 [==============================] - loss: 1.5547 - acc: 0.8751 - 8ms/step Epoch 35/50 step 196/196 [==============================] - loss: 1.5902 - acc: 0.8768 - 8ms/step Epoch 36/50 step 196/196 [==============================] - loss: 1.5661 - acc: 0.8787 - 8ms/step Epoch 37/50 step 196/196 [==============================] - loss: 1.6124 - acc: 0.8780 - 9ms/step Epoch 38/50 step 196/196 [==============================] - loss: 1.6125 - acc: 0.8801 - 10ms/step Epoch 39/50 step 196/196 [==============================] - loss: 1.6456 - acc: 0.8791 - 9ms/step Epoch 40/50 step 196/196 [==============================] - loss: 1.6018 - acc: 0.8816 - 10ms/step Epoch 41/50 step 196/196 [==============================] - loss: 1.6161 - acc: 0.8837 - 9ms/step Epoch 42/50 step 196/196 [==============================] - loss: 1.5863 - acc: 0.8841 - 8ms/step Epoch 43/50 step 196/196 [==============================] - loss: 1.6033 - acc: 0.8847 - 8ms/step Epoch 44/50 step 196/196 [==============================] - loss: 1.5980 - acc: 0.8858 - 8ms/step Epoch 45/50 step 196/196 [==============================] - loss: 1.5921 - acc: 0.8851 - 10ms/step Epoch 46/50 step 196/196 [==============================] - loss: 1.5371 - acc: 0.8866 - 10ms/step Epoch 47/50 step 196/196 [==============================] - loss: 1.5950 - acc: 0.8879 - 8ms/step Epoch 48/50 step 196/196 [==============================] - loss: 1.5795 - acc: 0.8870 - 8ms/step Epoch 49/50 step 196/196 [==============================] - loss: 1.5749 - acc: 0.8880 - 7ms/step Epoch 50/50 step 196/196 [==============================] - loss: 1.5721 - acc: 0.8894 - 8ms/step
令人叹为观止,10轮 acc就达到了1.000
模型存储
将我们训练得到的模型进行保存,以便后续评估和测试使用。
model.save('sheep_model')
Model 评估和测试
批量预测测试
评估
评估后准确率 0.8867924528301887,完犊子了
# plot the evaluate model.evaluate(eval_dataset,verbose=1)
Eval begin... The loss value printed in the log is the current batch, and the metric is the average value of previous step. step 10000/10000 [==============================] - loss: 1.4612 - acc: 0.6668 - 3ms/step Eval samples: 10000 {'loss': [1.4611502], 'acc': 0.6668}
预测
对eval_dataset数据进行预测
print('测试数据集样本量:{}'.format(len(eval_dataset)))
测试数据集样本量:10000
# 执行预测 result = model.predict(eval_dataset)
Predict begin... step 10000/10000 [==============================] - 3ms/step Predict samples: 10000
# 打印前10条看看结果 for idx in range(10): predict_label = str(np.argmax(result[0][idx])) real_label = str(eval_dataset.__getitem__(idx)[1]) print('样本ID:{}, 真实标签:{}, 预测值:{}'.format(idx, real_label, predict_label))
样本ID:0, 真实标签:3, 预测值:9 样本ID:1, 真实标签:8, 预测值:8 样本ID:2, 真实标签:8, 预测值:8 样本ID:3, 真实标签:0, 预测值:0 样本ID:4, 真实标签:6, 预测值:4 样本ID:5, 真实标签:6, 预测值:6 样本ID:6, 真实标签:1, 预测值:1 样本ID:7, 真实标签:6, 预测值:6 样本ID:8, 真实标签:3, 预测值:3 样本ID:9, 真实标签:1, 预测值:1