通过分析yaml文件,并将训练阶段的yolov7结构转onnx可视化以后,用Visio画了一下主干和SPPCSPC的结构图,其他结构部分后面有时间了再细画。【注意这里强调了是训练阶段,不是预测阶段,预测阶段的结构图和训练是不一样的,因为预测阶段采用了重参化结构,可以看我另一篇将RepVGG重构的文章】
根据项目中 yolov7-main\cfg\training\yolov7.yaml绘制主干结构。下面的注释内容是每次for循环得到的输出通道列表
backbone: # [from, number, module, args] [[-1, 1, Conv, [32, 3, 1]], # 0 conv1(3,32,3,s=1) [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 conv2(32,64,3,s=2) [-1, 1, Conv, [64, 3, 1]], # conv3(64,64,3,1) [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 conv4(64,128,k=3,s=2) [-1, 1, Conv, [64, 1, 1]], # conv5(128,64,1,1) 此刻的out_channels=[32,64,64,128,64]* [-2, 1, Conv, [64, 1, 1]], # conv6(128,64,1,1)* 这个卷积层就是一个1*1的identity [-1, 1, Conv, [64, 3, 1]], # conv7(64,64,3,1) [-1, 1, Conv, [64, 3, 1]], # conv8(64,64,3,1)* [-1, 1, Conv, [64, 3, 1]], # conv9(64,64,3,1) [-1, 1, Conv, [64, 3, 1]], # conv10(64,64,3,1) 此刻的output_channels = [32,64,64,128,64,64,64,64,64,64]* [[-1, -3, -5, -6], 1, Concat, [1]], # 取出通道[64,64,64,64], 拼接后256通道 output_channels = [32,64,64,128,64,64,64,64,64,64,256] [-1, 1, Conv, [256, 1, 1]], # 11 conv11(256,256,1,1) output_channels = [32,64,64,128,64,64,64,64,64,64,256,256] [-1, 1, MP, []], # maxpooling(k=2,s=2)通道数还是为256 [-1, 1, Conv, [128, 1, 1]], # conv12(256,128,1,1) output_channels = [32,64,64,128,64,64,64,64,64,64,256,256,256,128]* [-3, 1, Conv, [128, 1, 1]], # conv13(256,128,1,1) [32,64,64,128,64,64,64,64,64,64,256,256,256,128,128] [-1, 1, Conv, [128, 3, 2]], # conv14(128,128,3,2) [32,64,64,128,64,64,64,64,64,64,256,256,256,128,128,128]* [[-1, -3], 1, Concat, [1]], # 16-P3/8 输出256 [32,64,64,128,64,64,64,64,64,64,256,256,256,128,128,128,256] [-1, 1, Conv, [128, 1, 1]], # conv15(256,128,1,1)[32,64,64,128,64,64,64,64,64,64,256,256,256,128,128,128,256,128]* [-2, 1, Conv, [128, 1, 1]], # conv16(256,128,1,1)[32,64,64,128,64,64,64,64,64,64,256,256,256,128,128,128,256,128,128]* [-1, 1, Conv, [128, 3, 1]], # conv17(128,128,3,1)[32,64,64,128,64,64,64,64,64,64,256,256,256,128,128,128,256,128,128,128] [-1, 1, Conv, [128, 3, 1]], # conv18(128,128,3,1)[32,64,64,128,64,64,64,64,64,64,256,256,256,128,128,128,256,128,128,128,128]* [-1, 1, Conv, [128, 3, 1]], # conv19(128,128,3,1)[32,64,64,128,64,64,64,64,64,64,256,256,256,128,128,128,256,128,128,128,128,128] [-1, 1, Conv, [128, 3, 1]], # conv20(128,128,3,1)[32,64,64,128,64,64,64,64,64,64,256,256,256,128,128,128,256,128,128,128,128,128,128]* [[-1, -3, -5, -6], 1, Concat, [1]],# 512 [32,64,64,128,64,64,64,64,64,64,256,256,256,128,128,128,256,128,128,128,128,128,128,512] [-1, 1, Conv, [512, 1, 1]], # 24 conv21(512,512,1,1)[32,64,64,128,64,64,64,64,64,64,256,256,256,128,128,128,256,128,128,128,128,128,128,512,512] [-1, 1, MP, []],# [32,64,64,128,64,64,64,64,64,64,256,256,256,128,128,128,256,128,128,128,128,128,128,512,512,512] [-1, 1, Conv, [256, 1, 1]],# conv22(512,256,1,1)[32,64,64,128,64,64,64,64,64,64,256,256,256,128,128,128,256,128,128,128,128,128,128,512,512,512,256]* [-3, 1, Conv, [256, 1, 1]], # conv23(512,256,1,1)[32,64,64,128,64,64,64,64,64,64,256,256,256,128,128,128,256,128,128,128,128,128,128,512,512,512,256,256] [-1, 1, Conv, [256, 3, 2]], # conv24(256,256,3,2)[32,64,64,128,64,64,64,64,64,64,256,256,256,128,128,128,256,128,128,128,128,128,128,512,512,512,256,256,256]* [[-1, -3], 1, Concat, [1]], # 29-P4/16 512[32,64,64,128,64,64,64,64,64,64,256,256,256,128,128,128,256,128,128,128,128,128,128,512,512,512,256,256,256,512] [-1, 1, Conv, [256, 1, 1]],# conv25(512,256,1,1)[32,64,64,128,64,64,64,64,64,64,256,256,256,128,128,128,256,128,128,128,128,128,128,512,512,512,256,256,256,512,256]* [-2, 1, Conv, [256, 1, 1]],# conv26(512,256,1,1)[32,64,64,128,64,64,64,64,64,64,256,256,256,128,128,128,256,128,128,128,128,128,128,512,512,512,256,256,256,512,256,256]* [-1, 1, Conv, [256, 3, 1]],# conv27(256,256,3,1)[32,64,64,128,64,64,64,64,64,64,256,256,256,128,128,128,256,128,128,128,128,128,128,512,512,512,256,256,256,512,256,256,256] [-1, 1, Conv, [256, 3, 1]],# conv28(256,256,3,1)[32,64,64,128,64,64,64,64,64,64,256,256,256,128,128,128,256,128,128,128,128,128,128,512,512,512,256,256,256,512,256,256,256,256]* [-1, 1, Conv, [256, 3, 1]],# conv29(256,256,3,1)[32,64,64,128,64,64,64,64,64,64,256,256,256,128,128,128,256,128,128,128,128,128,128,512,512,512,256,256,256,512,256,256,256,256,256] [-1, 1, Conv, [256, 3, 1]],# conv30(256,256,3,1)[32,64,64,128,64,64,64,64,64,64,256,256,256,128,128,128,256,128,128,128,128,128,128,512,512,512,256,256,256,512,256,256,256,256,256,256]* [[-1, -3, -5, -6], 1, Concat, [1]],# 1024[32,64,64,128,64,64,64,64,64,64,256,256,256,128,128,128,256,128,128,128,128,128,128,512,512,512,256,256,256,512,256,256,256,256,256,256,1024] [-1, 1, Conv, [1024, 1, 1]], # 37 conv31(1024,1024,1,1)[32,64,64,128,64,64,64,64,64,64,256,256,256,128,128,128,256,128,128,128,128,128,128,512,512,512,256,256,256,512,256,256,256,256,256,256,1024,1024] [-1, 1, MP, []],# 1024 maxpooling(2,2)[32,64,64,128,64,64,64,64,64,64,256,256,256,128,128,128,256,128,128,128,128,128,128,512,512,512,256,256,256,512,256,256,256,256,256,256,1024,1024,1024] [-1, 1, Conv, [512, 1, 1]],# conv32(1024,512,1,1)[32,64,64,128,64,64,64,64,64,64,256,256,256,128,128,128,256,128,128,128,128,128,128,512,512,512,256,256,256,512,256,256,256,256,256,256,1024,1024,1024,512]* [-3, 1, Conv, [512, 1, 1]],# conv33(1024,512,1,1)[32,64,64,128,64,64,64,64,64,64,256,256,256,128,128,128,256,128,128,128,128,128,128,512,512,512,256,256,256,512,256,256,256,256,256,256,1024,1024,1024,512,512] [-1, 1, Conv, [512, 3, 2]],# conv34(512,512,3,2)[32,64,64,128,64,64,64,64,64,64,256,256,256,128,128,128,256,128,128,128,128,128,128,512,512,512,256,256,256,512,256,256,256,256,256,256,1024,1024,1024,512,512,512]* [[-1, -3], 1, Concat, [1]], # 42-P5/32 1024 [32,64,64,128,64,64,64,64,64,64,256,256,256,128,128,128,256,128,128,128,128,128,128,512,512,512,256,256,256,512,256,256,256,256,256,256,1024,1024,1024,512,512,512,1024] [-1, 1, Conv, [256, 1, 1]],# conv35(1024,256,1,1)[32,64,64,128,64,64,64,64,64,64,256,256,256,128,128,128,256,128,128,128,128,128,128,512,512,512,256,256,256,512,256,256,256,256,256,256,1024,1024,1024,512,512,512,1024,256]* [-2, 1, Conv, [256, 1, 1]],# conv36(1024,256,1,1)[32,64,64,128,64,64,64,64,64,64,256,256,256,128,128,128,256,128,128,128,128,128,128,512,512,512,256,256,256,512,256,256,256,256,256,256,1024,1024,1024,512,512,512,1024,256,256]* [-1, 1, Conv, [256, 3, 1]],# conv37(256,256,3,1)[32,64,64,128,64,64,64,64,64,64,256,256,256,128,128,128,256,128,128,128,128,128,128,512,512,512,256,256,256,512,256,256,256,256,256,256,1024,1024,1024,512,512,512,1024,256,256,256] [-1, 1, Conv, [256, 3, 1]],# conv38(256,256,3,1)[32,64,64,128,64,64,64,64,64,64,256,256,256,128,128,128,256,128,128,128,128,128,128,512,512,512,256,256,256,512,256,256,256,256,256,256,1024,1024,1024,512,512,512,1024,256,256,256,256]* [-1, 1, Conv, [256, 3, 1]],# conv39(256,256,3,1)[32,64,64,128,64,64,64,64,64,64,256,256,256,128,128,128,256,128,128,128,128,128,128,512,512,512,256,256,256,512,256,256,256,256,256,256,1024,1024,1024,512,512,512,1024,256,256,256,256,256] [-1, 1, Conv, [256, 3, 1]],# conv40(256,256,3,1)[32,64,64,128,64,64,64,64,64,64,256,256,256,128,128,128,256,128,128,128,128,128,128,512,512,512,256,256,256,512,256,256,256,256,256,256,1024,1024,1024,512,512,512,1024,256,256,256,256,256,256]* [[-1, -3, -5, -6], 1, Concat, [1]],# 1024[32,64,64,128,64,64,64,64,64,64,256,256,256,128,128,128,256,128,128,128,128,128,128,512,512,512,256,256,256,512,256,256,256,256,256,256,1024,1024,1024,512,512,512,1024,256,256,256,256,256,256,1024] [-1, 1, Conv, [1024, 1, 1]], # 50conv41(1024,1024,1,1)[32,64,64,128,64,64,64,64,64,64,256,256,256,128,128,128,256,128,128,128,128,128,128,512,512,512,256,256,256,512,256,256,256,256,256,256,1024,1024,1024,512,512,512,1024,256,256,256,256,256,256,1024,1024] ]
网络结构图如下(这可是我一点点看着yaml文件和onnx自己画的哦):可以看到在每个stage中,均有一个1 * 1大小的identity,就和ResNet是类似的,会会将4个部分进行拼接(通道层次上的拼接),因此拼接后的通道数会变成4倍。同时每次concat后又会经过一个K=1,S=1的卷积,再分别有两个分支,其中一个分支是经过MaxPooling,另一个分支就是正常的卷积,再将两个分支进行拼接。
SSPCSPC网络结构如下:可以看到该结构和SPP是有些相似的,都有1*1 5*5 9*9 13*13的池化层,同时与v3和v4一样,前后也均有卷积,不同的是有一个1*1的残差边。
这里我将SPPCSPC的onnx结构图也附上。
从onnx图上可以看到有个sigmoid和conv的相乘,其实这个过程就是SiLU激活函数的过程,只是因为转onnx的时候不支持SiLU激活函数,因此需要转化一下。大家就把那部分看为SiLU就可以啦。
转onnx代码如下:
yaml_file = '../cfg/training/yolov7.yaml' model = Model(yaml_file) x = torch.ones(1,3,640,640) torch.onnx.export(model, x, 'yolov7.onnx', verbose=False,opset_version=11)