2. Pytorch版
大致思路和TensorFlow一样,只是两个框架不同api的使用上会有所区别。
import os import torch from torch import nn from torch import functional as F from torch.utils import data from torchvision import transforms,datasets,models import numpy as np import time import random import matplotlib.pyplot as plt from sklearn.metrics import confusion_matrix plt.rcParams['font.sans-serif'] = ['simhei'] plt.rcParams['axes.unicode_minus'] = False 复制代码
2.1 载入数据
因为之前TensorFlow里已经下好了数据集,所以这里只用载入就好
file_path="./cats_and_dogs_filtered/" train="train" test="validation" trans=transforms.Compose([ transforms.Resize((224,224)),#随机切割将图片大小变为(224,224) transforms.ToTensor(), # 归一化为0-1 ]) train_data=datasets.ImageFolder(os.path.join(file_path,train),trans) test_data=datasets.ImageFolder(os.path.join(file_path,test),trans) random_choice=random.sample([i for i in range(len(train_data))],25) plt.figure(figsize=(10,8)) plt.suptitle("训练集可视化") for i,j in enumerate(random_choice): ax = plt.subplot(5,5,i+1) plt.imshow(train_data[j][0].numpy().transpose((1,2,0))) plt.title("标签为: "+str(train_data[j][1])+" "+train_data.classes[train_data[j][1]]) plt.axis("off") plt.show() batch_size=64 train_loader=data.DataLoader(train_data,batch_size=batch_size,shuffle=True) test_loader=data.DataLoader(test_data,batch_size=batch_size,shuffle=False) 复制代码
在这里对图片处理的操作并不多,不像之前还有一系列图像增强操作。主要还是调整图片大小和归一化
2.2 模型构建与训练
有了之前的经验,我们干脆放弃花里胡哨的复杂网络,直接上基本的CNN
base_model=nn.Sequential( nn.Conv2d(3,48, kernel_size=7, stride=4, padding=1), nn.ReLU(), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(48, 96, kernel_size=5, padding=2), nn.ReLU(), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(96,128, kernel_size=3, padding=1), nn.ReLU(), nn.Conv2d(128,128, kernel_size=3, padding=1), nn.ReLU(), nn.MaxPool2d(kernel_size=3, stride=2), nn.Flatten(), nn.Linear(4608, 1024), nn.ReLU(), nn.Dropout(p=0.5), nn.Linear(1024, 512), nn.ReLU(), nn.Dropout(p=0.5), nn.Linear(512,2) ) # 模型参数初始化 for name,param in base_model.named_parameters(): if 'weight' in name: nn.init.kaiming_normal_(param) elif 'bias' in name: nn.init.constant_(param,val=0) 复制代码
各层输出如下
开始训练
epochs=40 lr=1e-4 cirterion=nn.CrossEntropyLoss() optimizer=torch.optim.Adam(base_model.parameters(),lr=lr) base_model=base_model.cuda() base_model.train() loss_=0. train_acc=0. total=0. for epoch in range(epochs): for i,data in enumerate(train_loader,0): inputs,train_labels=data optimizer.zero_grad() outputs=base_model(inputs.cuda()) _,predicts=torch.max(outputs.data,1) train_acc+=(predicts.cuda()==train_labels.cuda().data).sum() loss=cirterion(outputs,train_labels.cuda()) loss.backward() optimizer.step() loss_+=loss.item() #print(f"epoch: {epoch},loss: {loss_}") total+=train_labels.size(0) print(f"epoch: {epoch},loss={loss_/total*batch_size},acc={100*train_acc/total}%") 复制代码
Pytorch训练起来比TensorFlow繁琐一些,需要在自己定义好损失函数以及优化器之后,进行正向传播计算损失、梯度,然后再利用优化器更新模型参数。当然,在高版本适配的Pytorch Lightning中已经实现了类似于TensorFlow.keras那样简单的compile、fit方法。不过我现在的显卡只能支持1.2,而Lightning最低要求1.3……为了使用GPU还是保持现状吧。
看看测试集上效果如何
def test(model,test_loader): model.eval() correct=0 test_predict=[] with torch.no_grad(): for idx,(t_data,t_target) in enumerate(test_loader): t_data,t_target=t_data.cuda(),t_target.cuda() pred=model(t_data) pred_class=pred.argmax(dim=1) test_predict.extend(pred_class.cpu()) correct+=(pred_class==t_target).sum().item() acc=correct/len(test_data) print(f"测试集上准确率为: {acc*100}%") return test_predict test_predict=test(base_model,test_loader) 复制代码
y_true=test_loader.dataset.targets matrix=confusion_matrix(y_true,test_predict) def plot_confusion_matrix(cm,classes, title='混淆矩阵'): plt.figure(figsize=(12, 8), dpi=100) np.set_printoptions(precision=2) # 在混淆矩阵中每格的概率值 ind_array = np.arange(len(classes)) x, y = np.meshgrid(ind_array, ind_array) for x_val, y_val in zip(x.flatten(), y.flatten()): c = cm[y_val][x_val] if c > 0.001: plt.text(x_val, y_val, "%0.2f" % (c,), color='red', fontsize=15, va='center', ha='center') plt.imshow(cm, interpolation='nearest') plt.title(title) xlocations = np.array(range(len(classes))) plt.xticks(xlocations, classes, rotation=90) plt.yticks(xlocations, classes) plt.ylabel('真实值') plt.xlabel('预测值') plt.show() plot_confusion_matrix(matrix,list(test_loader.dataset.class_to_idx)) 复制代码
效果和之前TensorFlow中的差不太多,那再试试迁移学习
transfer_model=models.densenet201(pretrained=True) for param in transfer_model.parameters(): param.requires_grad=False transfer_model.classifier=nn.Sequential( nn.Linear(1920,512), nn.LeakyReLU(0.1), nn.Linear(512,128), nn.Dropout(0.5), nn.Linear(128,2) ) transfer_model=transfer_model.cuda() optimizer=torch.optim.Adam(transfer_model.parameters(),lr=lr) epochs = 10 transfer_model.train() loss_=0. train_acc=0. total=0. for epoch in range(epochs): for i,data in enumerate(train_loader): inputs,train_labels=data optimizer.zero_grad() outputs=transfer_model(inputs.cuda()) _,predicts=torch.max(outputs.data,1) train_acc+=torch.sum(predicts.cuda()==train_labels.cuda().data) loss=cirterion(outputs,train_labels.cuda()) loss.backward() optimizer.step() loss_+=loss.item() #print(f"epoch: {epoch},loss: {loss_}") total+=train_labels.size(0) print(f"epoch: {epoch},loss={loss_/total*batch_size},acc={100*train_acc/total}%") 复制代码
在迁移学习中,我们只需要将classifier层调整为我们自己需要的就好,前面的预训练模型不用动,也不用参加训练。
测试集上准确率依然有98.6%,也和之前TensorFlow上结果差不多,最后保存模型
torch.save(transfer_model,'./torch_model/transfer_model.pkl') 复制代码
3. 搭建图片分类服务
前面经过不断地调参、优化网络模型、尝试不同方法终于得到了性能优秀的模型,那就得好好利用起来。之前写过FastAPI的专栏,最后用sklearn做了一个demo。今天也和那个类似,使用本地保存下来的模型对上传的图片进行预测分类
# -*- coding: utf8 -*- from PIL import Image from fastapi import FastAPI, File, UploadFile, HTTPException from fastapi.requests import Request from fastapi.responses import RedirectResponse from io import BytesIO import tensorflow as tf import uvicorn import numpy as np from typing import Optional, List from starlette.templating import Jinja2Templates tmp = Jinja2Templates(directory='templates') class Model: model: Optional def load_model(self): self.model = tf.keras.models.load_model("./tf_model/transfer_model") def predict(self, input_image): output = self.model.predict_classes(input_image).item() mapping = { 0: 'cat', 1: 'dog' } return mapping[output] def read_convert_image(file): loaded_image = Image.open(BytesIO(file)) image_to_convert = np.asarray(loaded_image.resize((224, 224)))[..., :3] image_to_convert = np.expand_dims(image_to_convert, 0) image_to_convert = image_to_convert / 255.0 return np.float32(image_to_convert) describe = ''' <h2>访问/predict/image路由去尝试用训练好的模型对猫狗图片进行分类预测</h2> ''' app = FastAPI(description=describe) mymodel = Model() @app.get("/predict/image") def index(request: Request): return tmp.TemplateResponse('predict.html', { 'request': request, }) @app.post("/predict/image") async def image(request: Request, image_to_predict: UploadFile = File(...)): if image_to_predict is None or image_to_predict.file is None: raise HTTPException(status_code=400, detail="Please provide an image when calling this request") extension = image_to_predict.filename.split(".")[-1] in ("jpg", "jpeg", "png") if not extension: raise HTTPException(status_code=400, detail="Please provide an jpg or png image") img = image_to_predict.filename image_data = read_convert_image(image_to_predict.file.read()) prediction = mymodel.predict(image_data) return tmp.TemplateResponse('result.html', { 'request': request, "img": img, 'prediction': prediction }) @app.get('/') async def hello(): return RedirectResponse("/docs") @app.on_event("startup") async def startup(): mymodel.load_model() if __name__ == "__main__": uvicorn.run("app:app", port=8000) 复制代码
依然是启动项目的时候加载模型,然后表单上传图片进行预测,返回结果。不过由于前端确实不好,想用html实现简单的页面渲染但是有的功能还是无法实现很好。无奈,只能做一个简陋的页面出来。
由于模型加载会花费较长时间,所以尽量别添加reload。下次试试TensorFlow serving
result.html
<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title>猫狗大战预测</title> </head> <body> <h1>传入图片名称为:{{img}}</h1> <h1>预测结果为:{{prediction}}</h1> <a href="/predict/image"><strong>返回继续</strong></a> </body> </html> 复制代码
predict.html
<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title>猫狗大战预测</title> </head> <body> <h1>上传一个猫/狗图片进行分类预测</h1> <form action="/predict/image/" enctype="multipart/form-data" onchange="changepic(this)" method="post"> <input type="file" id="file" name="image_to_predict" accept="image/*"> <input type="submit" value="预测"> </form> <img src="" id="show" width="200"> </body> <script> function changepic() { var reads= new FileReader(); f=document.getElementById('file').files[0]; reads.readAsDataURL(f); reads.onload=function (e) { document.getElementById('show').src=this.result; }; } </script> </html> 复制代码
4. 最终效果
为了检验,去网上下载一些图试试