1.5 RFB(Receptive Field Block)
RFB模块是在《ECCV2018:Receptive Field Block Net for Accurate and Fast Object Detection》一文中提出的,该文的出发点是模拟人类视觉的感受野从而加强网络的特征提取能力,在结构上RFB借鉴了Inception的思想,主要是在Inception的基础上加入了空洞卷积,从而有效增大了感受野
RFB和RFB-s的架构。RFB-s用于在浅层人类视网膜主题图中模拟较小的pRF,使用具有较小内核的更多分支。
class BasicConv(nn.Module): def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True, bn=True): super(BasicConv, self).__init__() self.out_channels = out_planes if bn: self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=False) self.bn = nn.BatchNorm2d(out_planes, eps=1e-5, momentum=0.01, affine=True) self.relu = nn.ReLU(inplace=True) if relu else None else: self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=True) self.bn = None self.relu = nn.ReLU(inplace=True) if relu else None def forward(self, x): x = self.conv(x) if self.bn is not None: x = self.bn(x) if self.relu is not None: x = self.relu(x) return x class BasicRFB(nn.Module): def __init__(self, in_planes, out_planes, stride=1, scale=0.1, map_reduce=8, vision=1, groups=1): super(BasicRFB, self).__init__() self.scale = scale self.out_channels = out_planes inter_planes = in_planes // map_reduce self.branch0 = nn.Sequential( BasicConv(in_planes, inter_planes, kernel_size=1, stride=1, groups=groups, relu=False), BasicConv(inter_planes, 2 * inter_planes, kernel_size=(3, 3), stride=stride, padding=(1, 1), groups=groups), BasicConv(2 * inter_planes, 2 * inter_planes, kernel_size=3, stride=1, padding=vision + 1, dilation=vision + 1, relu=False, groups=groups) ) self.branch1 = nn.Sequential( BasicConv(in_planes, inter_planes, kernel_size=1, stride=1, groups=groups, relu=False), BasicConv(inter_planes, 2 * inter_planes, kernel_size=(3, 3), stride=stride, padding=(1, 1), groups=groups), BasicConv(2 * inter_planes, 2 * inter_planes, kernel_size=3, stride=1, padding=vision + 2, dilation=vision + 2, relu=False, groups=groups) ) self.branch2 = nn.Sequential( BasicConv(in_planes, inter_planes, kernel_size=1, stride=1, groups=groups, relu=False), BasicConv(inter_planes, (inter_planes // 2) * 3, kernel_size=3, stride=1, padding=1, groups=groups), BasicConv((inter_planes // 2) * 3, 2 * inter_planes, kernel_size=3, stride=stride, padding=1, groups=groups), BasicConv(2 * inter_planes, 2 * inter_planes, kernel_size=3, stride=1, padding=vision + 4, dilation=vision + 4, relu=False, groups=groups) ) self.ConvLinear = BasicConv(6 * inter_planes, out_planes, kernel_size=1, stride=1, relu=False) self.shortcut = BasicConv(in_planes, out_planes, kernel_size=1, stride=stride, relu=False) self.relu = nn.ReLU(inplace=False) def forward(self, x): x0 = self.branch0(x) x1 = self.branch1(x) x2 = self.branch2(x) out = torch.cat((x0, x1, x2), 1) out = self.ConvLinear(out) short = self.shortcut(x) out = out * self.scale + short out = self.relu(out) return out
1.6 SPPCSPC
该模块是YOLOv7中使用的SPP结构,表现优于SPPF,但参数量和计算量提升了很多
class SPPCSPC(nn.Module): # CSP https://github.com/WongKinYiu/CrossStagePartialNetworks def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5, k=(5, 9, 13)): super(SPPCSPC, self).__init__() c_ = int(2 * c2 * e) # hidden channels self.cv1 = Conv(c1, c_, 1, 1) self.cv2 = Conv(c1, c_, 1, 1) self.cv3 = Conv(c_, c_, 3, 1) self.cv4 = Conv(c_, c_, 1, 1) self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) self.cv5 = Conv(4 * c_, c_, 1, 1) self.cv6 = Conv(c_, c_, 3, 1) self.cv7 = Conv(2 * c_, c2, 1, 1) def forward(self, x): x1 = self.cv4(self.cv3(self.cv1(x))) y1 = self.cv6(self.cv5(torch.cat([x1] + [m(x1) for m in self.m], 1))) y2 = self.cv2(x) return self.cv7(torch.cat((y1, y2), dim=1))
#分组SPPCSPC 分组后参数量和计算量与原本差距不大,不知道效果怎么样 class SPPCSPC_group(nn.Module): def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5, k=(5, 9, 13)): super(SPPCSPC_group, self).__init__() c_ = int(2 * c2 * e) # hidden channels self.cv1 = Conv(c1, c_, 1, 1, g=4) self.cv2 = Conv(c1, c_, 1, 1, g=4) self.cv3 = Conv(c_, c_, 3, 1, g=4) self.cv4 = Conv(c_, c_, 1, 1, g=4) self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) self.cv5 = Conv(4 * c_, c_, 1, 1, g=4) self.cv6 = Conv(c_, c_, 3, 1, g=4) self.cv7 = Conv(2 * c_, c2, 1, 1, g=4) def forward(self, x): x1 = self.cv4(self.cv3(self.cv1(x))) y1 = self.cv6(self.cv5(torch.cat([x1] + [m(x1) for m in self.m], 1))) y2 = self.cv2(x) return self.cv7(torch.cat((y1, y2), dim=1))
1.7 SPPFCSPC🍀
我借鉴了SPPF的思想将SPPCSPC优化了一下,得到了SPPFCSPC,在保持感受野不变的情况下获得速度提升;我把这个模块给v7作者看了,并没有得到否定,详细回答可以看4 Issue
A:
Max pooling uses very few computation, if you programming well, above one could run three max pool layers in parallel, while below one must process three max pool layers sequentially.
By the way, you could replace SPPCSPC by SPPFCSPC at inference time if your hardware is friendly to SPPFCSPC.
有问题欢迎大家指正,如果感觉有帮助的话请点赞支持下👍📖🌟
参考文献:增强感受野SPP、ASPP、RFB、PPM