YOLOv5图像分割--SegmentationModel类代码详解(上)

简介: 笔记

15.png

SegmentationModel类


定义model将会调用models/yolo.py中的类SegmentationModel。该类是继承父类--DetectionModel类。


class SegmentationModel(DetectionModel):  # SegmentationModel这个类是继承了DetectionModel这个类
    # YOLOv5 segmentation model
    def __init__(self, cfg='yolov5s-seg.yaml', ch=3, nc=None, anchors=None):
        super().__init__(cfg, ch, nc, anchors)

DetectionModel类


因此直接去看下DetectionModel这个类代码,同时也能发现这个类又是继承BaseModel这个类。这里先看一下DetectionModel,后面再看BaseModel这个类。这个类的功能可以根据yaml文件定义网络【定义网络的函数为parse_model()】,在分割任务中,anchors为None。


class DetectionModel(BaseModel):  # 继承BaseModel这个类
    # YOLOv5 detection model
    def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None):  # model, input channels, number of classes
        super().__init__()
        if isinstance(cfg, dict):
            self.yaml = cfg  # model dict
        else:  # is *.yaml
            import yaml  # for torch hub
            self.yaml_file = Path(cfg).name
            with open(cfg, encoding='ascii', errors='ignore') as f:
                self.yaml = yaml.safe_load(f)  # model dict
        # Define model
        ch = self.yaml['ch'] = self.yaml.get('ch', ch)  # input channels
        if nc and nc != self.yaml['nc']:
            LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
            self.yaml['nc'] = nc  # override yaml value
        if anchors:
            LOGGER.info(f'Overriding model.yaml anchors with anchors={anchors}')
            self.yaml['anchors'] = round(anchors)  # override yaml value
        self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch])  # model, savelist


得到的model如下,这里需要注意的是此时的self指SegmentationModel类。

Sequential(

 (0): Conv(

   (conv): Conv2d(3, 32, kernel_size=(6, 6), stride=(2, 2), padding=(2, 2), bias=False)

   (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

   (act): SiLU()

 )

 (1): Conv(

   (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)

   (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

   (act): SiLU()

 )

 (2): C3(

   (cv1): Conv(

     (conv): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)

     (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

     (act): SiLU()

   )

   (cv2): Conv(

     (conv): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)

     (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

     (act): SiLU()

   )

   (cv3): Conv(

     (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)

     (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

     (act): SiLU()

   )

   (m): Sequential(

     (0): Bottleneck(

       (cv1): Conv(

         (conv): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)

         (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

         (act): SiLU()

       )

       (cv2): Conv(

         (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)

         (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

         (act): SiLU()

       )

     )

   )

 )

 (3): Conv(

   (conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)

   (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

   (act): SiLU()

 )

 (4): C3(

   (cv1): Conv(

     (conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)

     (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

     (act): SiLU()

   )

   (cv2): Conv(

     (conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)

     (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

     (act): SiLU()

   )

   (cv3): Conv(

     (conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)

     (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

     (act): SiLU()

   )

   (m): Sequential(

     (0): Bottleneck(

       (cv1): Conv(

         (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)

         (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

         (act): SiLU()

       )

       (cv2): Conv(

         (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)

         (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

         (act): SiLU()

       )

     )

     (1): Bottleneck(

       (cv1): Conv(

         (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)

         (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

         (act): SiLU()

       )

       (cv2): Conv(

         (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)

         (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

         (act): SiLU()

       )

     )

   )

 )

 (5): Conv(

   (conv): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)

   (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

   (act): SiLU()

 )

 (6): C3(

   (cv1): Conv(

     (conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)

     (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

     (act): SiLU()

   )

   (cv2): Conv(

     (conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)

     (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

     (act): SiLU()

   )

   (cv3): Conv(

     (conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)

     (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

     (act): SiLU()

   )

   (m): Sequential(

     (0): Bottleneck(

       (cv1): Conv(

         (conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)

         (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

         (act): SiLU()

       )

       (cv2): Conv(

         (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)

         (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

         (act): SiLU()

       )

     )

     (1): Bottleneck(

       (cv1): Conv(

         (conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)

         (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

         (act): SiLU()

       )

       (cv2): Conv(

         (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)

         (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

         (act): SiLU()

       )

     )

     (2): Bottleneck(

       (cv1): Conv(

         (conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)

         (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

         (act): SiLU()

       )

       (cv2): Conv(

         (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)

         (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

         (act): SiLU()

       )

     )

   )

 )

 (7): Conv(

   (conv): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)

   (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

   (act): SiLU()

 )

 (8): C3(

   (cv1): Conv(

     (conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)

     (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

     (act): SiLU()

   )

   (cv2): Conv(

     (conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)

     (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

     (act): SiLU()

   )

   (cv3): Conv(

     (conv): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)

     (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

     (act): SiLU()

   )

   (m): Sequential(

     (0): Bottleneck(

       (cv1): Conv(

         (conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)

         (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

         (act): SiLU()

       )

       (cv2): Conv(

         (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)

         (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

         (act): SiLU()

       )

     )

   )

 )

 (9): SPPF(

   (cv1): Conv(

     (conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)

     (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

     (act): SiLU()

   )

   (cv2): Conv(

     (conv): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)

     (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

     (act): SiLU()

   )

   (m): MaxPool2d(kernel_size=5, stride=1, padding=2, dilation=1, ceil_mode=False)

 )

 (10): Conv(

   (conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)

   (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

   (act): SiLU()

 )

 (11): Upsample(scale_factor=2.0, mode=nearest)

 (12): Concat()

 (13): C3(

   (cv1): Conv(

     (conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)

     (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

     (act): SiLU()

   )

   (cv2): Conv(

     (conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)

     (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

     (act): SiLU()

   )

   (cv3): Conv(

     (conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)

     (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

     (act): SiLU()

   )

   (m): Sequential(

     (0): Bottleneck(

       (cv1): Conv(

         (conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)

         (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

         (act): SiLU()

       )

       (cv2): Conv(

         (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)

         (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

         (act): SiLU()

       )

     )

   )

 )

 (14): Conv(

   (conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)

   (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

   (act): SiLU()

 )

 (15): Upsample(scale_factor=2.0, mode=nearest)

 (16): Concat()

 (17): C3(

   (cv1): Conv(

     (conv): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)

     (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

     (act): SiLU()

   )

   (cv2): Conv(

     (conv): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)

     (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

     (act): SiLU()

   )

   (cv3): Conv(

     (conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)

     (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

     (act): SiLU()

   )

   (m): Sequential(

     (0): Bottleneck(

       (cv1): Conv(

         (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)

         (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

         (act): SiLU()

       )

       (cv2): Conv(

         (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)

         (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

         (act): SiLU()

       )

     )

   )

 )

 (18): Conv(

   (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)

   (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

   (act): SiLU()

 )

 (19): Concat()

 (20): C3(

   (cv1): Conv(

     (conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)

     (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

     (act): SiLU()

   )

   (cv2): Conv(

     (conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)

     (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

     (act): SiLU()

   )

   (cv3): Conv(

     (conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)

     (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

     (act): SiLU()

   )

   (m): Sequential(

     (0): Bottleneck(

       (cv1): Conv(

         (conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)

         (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

         (act): SiLU()

       )

       (cv2): Conv(

         (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)

         (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

         (act): SiLU()

       )

     )

   )

 )

 (21): Conv(

   (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)

   (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

   (act): SiLU()

 )

 (22): Concat()

 (23): C3(

   (cv1): Conv(

     (conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)

     (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

     (act): SiLU()

   )

   (cv2): Conv(

     (conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)

     (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

     (act): SiLU()

   )

   (cv3): Conv(

     (conv): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)

     (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

     (act): SiLU()

   )

   (m): Sequential(

     (0): Bottleneck(

       (cv1): Conv(

         (conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)

         (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

         (act): SiLU()

       )

       (cv2): Conv(

         (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)

         (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

         (act): SiLU()

       )

     )

   )

 )

 (24): Segment(

   (m): ModuleList(

     (0): Conv2d(128, 351, kernel_size=(1, 1), stride=(1, 1))

     (1): Conv2d(256, 351, kernel_size=(1, 1), stride=(1, 1))

     (2): Conv2d(512, 351, kernel_size=(1, 1), stride=(1, 1))

   )

   (proto): Proto(

     (cv1): Conv(

       (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)

       (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

       (act): SiLU()

     )

     (upsample): Upsample(scale_factor=2.0, mode=nearest)

     (cv2): Conv(

       (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)

       (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

       (act): SiLU()

     )

     (cv3): Conv(

       (conv): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)

       (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

       (act): SiLU()

     )

   )

 )

)


然后继续看下面的代码,m=self.model[-1]是获取上面定义model的最后一个模块即Segment类【这个类又继承Detect类,这个】,所以此时的m类型为Segment类。然后看forward 的lambda表达式那行, 由于通过isinstance判断m为Segment为True,所以此时调用SegmentationModel类的forward函数,并且可以回看前面SegmentationModel这个类发现没有重新父类DetectionModel的forward函数,所以这里直接调用父类的forward即可。  

        # Build strides, anchors
        m = self.model[-1]  # Detect()
        if isinstance(m, (Detect, Segment)):
            s = 256  # 2x min stride
            m.inplace = self.inplace
            forward = lambda x: self.forward(x)[0] if isinstance(m, Segment) else self.forward(x)


目录
相关文章
|
6月前
|
机器学习/深度学习 开发工具 计算机视觉
YOLOv8 目标检测 | 自定义数据集
YOLOv8 目标检测 | 自定义数据集
|
机器学习/深度学习 编解码 算法
yolo原理系列——yolov1--yolov5详细解释
yolo原理系列——yolov1--yolov5详细解释
1224 0
yolo原理系列——yolov1--yolov5详细解释
|
1月前
|
机器学习/深度学习 JSON 数据可视化
YOLO11-pose关键点检测:训练实战篇 | 自己数据集从labelme标注到生成yolo格式的关键点数据以及训练教程
本文介绍了如何将个人数据集转换为YOLO11-pose所需的数据格式,并详细讲解了手部关键点检测的训练过程。内容涵盖数据集标注、格式转换、配置文件修改及训练参数设置,最终展示了训练结果和预测效果。适用于需要进行关键点检测的研究人员和开发者。
189 0
|
6月前
|
机器学习/深度学习 算法 计算机视觉
[YOLOv8/YOLOv7/YOLOv5系列算法改进NO.5]改进特征融合网络PANET为BIFPN(更新添加小目标检测层yaml)
本文介绍了改进YOLOv5以解决处理复杂背景时可能出现的错漏检问题。
264 5
|
5月前
|
存储 API 计算机视觉
实战|YOLOv10 自定义目标检测
实战|YOLOv10 自定义目标检测
248 1
|
5月前
|
机器学习/深度学习 分布式计算 并行计算
基于YOLO和Darknet预训练模型的对象检测
【6月更文挑战第6天】基于YOLO和Darknet预训练模型的对象检测。
52 2
|
5月前
|
计算机视觉
实战|基于YOLOv10与MobileSAM实现目标检测与分割【附完整源码】
实战|基于YOLOv10与MobileSAM实现目标检测与分割【附完整源码】
|
6月前
|
存储 自动驾驶 安全
基于 YOLOv8 的图像分割 demo
基于 YOLOv8 的图像分割 demo
|
6月前
|
机器学习/深度学习 自动驾驶 安全
使用YOLO检测图像中的对象
使用YOLO检测图像中的对象
|
存储 机器学习/深度学习 数据采集
使用 YOLO V2深度学习进行多类对象检测
使用 YOLO V2深度学习进行多类对象检测。
141 1