DL之RetinaNet:基于RetinaNet算法(keras框架)利用resnet50_coco数据集(.h5文件)实现目标检测

简介: DL之RetinaNet:基于RetinaNet算法(keras框架)利用resnet50_coco数据集(.h5文件)实现目标检测

输出结果

image.png


image.png

image.png

设计思路

更新中


核心代码

def __create_pyramid_features(C3, C4, C5, feature_size=256):

   """ Creates the FPN layers on top of the backbone features.

      在ResNet基础上创建FPN金字塔特征:参照博客的框架图,输入[C3,C4,C5],返回5个特征级别[P3, P4, P5, P6, P7]

      参考博客:https://yunyaniu.blog.csdn.net/article/details/100010853

   Args

       C3           : Feature stage C3 from the backbone.

       C4           : Feature stage C4 from the backbone.

       C5           : Feature stage C5 from the backbone.

       feature_size : The feature size to use for the resulting feature levels.

   Returns

       A list of feature levels [P3, P4, P5, P6, P7].

   """

   # upsample C5 to get P5 from the FPN paper

   P5           = keras.layers.Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name='C5_reduced')(C5)

   P5_upsampled = layers.UpsampleLike(name='P5_upsampled')([P5, C4])

   P5           = keras.layers.Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name='P5')(P5)

   # add P5 elementwise to C4

   P4           = keras.layers.Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name='C4_reduced')(C4)

   P4           = keras.layers.Add(name='P4_merged')([P5_upsampled, P4])

   P4_upsampled = layers.UpsampleLike(name='P4_upsampled')([P4, C3])

   P4           = keras.layers.Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name='P4')(P4)

   # add P4 elementwise to C3

   P3 = keras.layers.Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name='C3_reduced')(C3)

   P3 = keras.layers.Add(name='P3_merged')([P4_upsampled, P3])

   P3 = keras.layers.Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name='P3')(P3)

   # "P6 is obtained via a 3x3 stride-2 conv on C5"

   P6 = keras.layers.Conv2D(feature_size, kernel_size=3, strides=2, padding='same', name='P6')(C5)

   # "P7 is computed by applying ReLU followed by a 3x3 stride-2 conv on P6"

   P7 = keras.layers.Activation('relu', name='C6_relu')(P6)

   P7 = keras.layers.Conv2D(feature_size, kernel_size=3, strides=2, padding='same', name='P7')(P7)

   return [P3, P4, P5, P6, P7]

def default_submodels(num_classes, num_anchors):

   """ Create a list of default submodels used for object detection.

            两个子模型:目标分类子模型default_classification_model、框回归子模型default_regression_model

   The default submodels contains a regression submodel and a classification submodel.

   Args

       num_classes : Number of classes to use.

       num_anchors : Number of base anchors.

   Returns

       A list of tuple, where the first element is the name of the submodel and the second element is the submodel itself.

   """

   return [

       ('regression', default_regression_model(4, num_anchors)),

       ('classification', default_classification_model(num_classes, num_anchors))

   ]

def __build_model_pyramid(name, model, features):

   """ Applies a single submodel to each FPN level.

       真正的构造金字塔模型

   Args

       name     : Name of the submodel.

       model    : The submodel to evaluate.

       features : The FPN features.

   Returns

       A tensor containing the response from the submodel on the FPN features.

   """

   return keras.layers.Concatenate(axis=1, name=name)([model(f) for f in features])

"""

The default anchor parameters. 默认的anchors参数,组合以后有9个anchors

"""

AnchorParameters.default = AnchorParameters(

   sizes   = [32, 64, 128, 256, 512],

   strides = [8, 16, 32, 64, 128],

   ratios  = np.array([0.5, 1, 2], keras.backend.floatx()),

   scales  = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)], keras.backend.floatx()),

)

def anchor_targets_bbox(

   anchors,

   image_group,

   annotations_group,

   num_classes,

   #negative_overlap和positive_overlap,根据IOU区分

   negative_overlap=0.4,  

   positive_overlap=0.5

):

def focal(alpha=0.25, gamma=2.0):

   """ Create a functor for computing the focal loss.

   Args

       alpha: Scale the focal weight with alpha.

       gamma: Take the power of the focal weight with gamma.

   Returns

       A functor that computes the focal loss using the alpha and gamma.

   """

   def _focal(y_true, y_pred):

       """ Compute the focal loss given the target tensor and the predicted tensor.

       As defined in https://arxiv.org/abs/1708.02002

       Args

           y_true: Tensor of target data from the generator with shape (B, N, num_classes).

           y_pred: Tensor of predicted data from the network with shape (B, N, num_classes).

       Returns

           The focal loss of y_pred w.r.t. y_true.

       """

       labels         = y_true[:, :, :-1]

       anchor_state   = y_true[:, :, -1]  # -1 for ignore, 0 for background, 1 for object

       classification = y_pred

       # filter out "ignore" anchors

       indices        = backend.where(keras.backend.not_equal(anchor_state, -1))

       labels         = backend.gather_nd(labels, indices)

       classification = backend.gather_nd(classification, indices)

       # compute the focal loss

       alpha_factor = keras.backend.ones_like(labels) * alpha

       alpha_factor = backend.where(keras.backend.equal(labels, 1), alpha_factor, 1 - alpha_factor)

       focal_weight = backend.where(keras.backend.equal(labels, 1), 1 - classification, classification)

       focal_weight = alpha_factor * focal_weight ** gamma

     

       #定义分类损失: 权重*原来的交叉熵损失

       cls_loss = focal_weight * keras.backend.binary_crossentropy(labels, classification)

       # compute the normalizer: the number of positive anchors

       normalizer = backend.where(keras.backend.equal(anchor_state, 1))

       normalizer = keras.backend.cast(keras.backend.shape(normalizer)[0], keras.backend.floatx())

       normalizer = keras.backend.maximum(keras.backend.cast_to_floatx(1.0), normalizer)

       return keras.backend.sum(cls_loss) / normalizer

   return _focal

def smooth_l1(sigma=3.0):  #框回归损失采用smooth_l1函数

   """ Create a smooth L1 loss functor.

   Args

       sigma: This argument defines the point where the loss changes from L2 to L1.

   Returns

       A functor for computing the smooth L1 loss given target data and predicted data.

   """

   sigma_squared = sigma ** 2

   def _smooth_l1(y_true, y_pred):

       """ Compute the smooth L1 loss of y_pred w.r.t. y_true.

       Args

           y_true: Tensor from the generator of shape (B, N, 5). The last value for each box is the state of the anchor (ignore, negative, positive).

           y_pred: Tensor from the network of shape (B, N, 4).

       Returns

           The smooth L1 loss of y_pred w.r.t. y_true.

       """

       # separate target and state

       regression        = y_pred

       regression_target = y_true[:, :, :-1]

       anchor_state      = y_true[:, :, -1]

       # filter out "ignore" anchors

       indices           = backend.where(keras.backend.equal(anchor_state, 1))

       regression        = backend.gather_nd(regression, indices)

       regression_target = backend.gather_nd(regression_target, indices)

       # compute smooth L1 loss

       # f(x) = 0.5 * (sigma * x)^2          if |x| < 1 / sigma / sigma

       #        |x| - 0.5 / sigma / sigma    otherwise

       regression_diff = regression - regression_target

       regression_diff = keras.backend.abs(regression_diff)

       regression_loss = backend.where(

           keras.backend.less(regression_diff, 1.0 / sigma_squared),

           0.5 * sigma_squared * keras.backend.pow(regression_diff, 2),

           regression_diff - 0.5 / sigma_squared

       )

       # compute the normalizer: the number of positive anchors

       normalizer = keras.backend.maximum(1, keras.backend.shape(indices)[0])

       normalizer = keras.backend.cast(normalizer, dtype=keras.backend.floatx())

       return keras.backend.sum(regression_loss) / normalizer

   return _smooth_l1


相关文章
|
监控 算法 自动驾驶
RetinaNet算法1
8月更文挑战第6天
|
机器学习/深度学习 监控 算法
RetinaNet算法2
8月更文挑战第7天
|
机器学习/深度学习 数据采集 监控
算法金 | DL 骚操作扫盲,神经网络设计与选择、参数初始化与优化、学习率调整与正则化、Loss Function、Bad Gradient
**神经网络与AI学习概览** - 探讨神经网络设计,包括MLP、RNN、CNN,激活函数如ReLU,以及隐藏层设计,强调网络结构与任务匹配。 - 参数初始化与优化涉及Xavier/He初始化,权重和偏置初始化,优化算法如SGD、Adam,针对不同场景选择。 - 学习率调整与正则化,如动态学习率、L1/L2正则化、早停法和Dropout,以改善训练和泛化。
372 0
算法金 | DL 骚操作扫盲,神经网络设计与选择、参数初始化与优化、学习率调整与正则化、Loss Function、Bad Gradient
|
机器学习/深度学习 并行计算 算法
【计算机视觉+CNN】keras+ResNet残差网络实现图像识别分类实战(附源码和数据集 超详细)
【计算机视觉+CNN】keras+ResNet残差网络实现图像识别分类实战(附源码和数据集 超详细)
651 1
|
机器学习/深度学习 文字识别 算法
【Keras计算机视觉OCR】文字识别算法中DenseNet、LSTM、CTC、Attention的讲解(图文解释 超详细)
【Keras计算机视觉OCR】文字识别算法中DenseNet、LSTM、CTC、Attention的讲解(图文解释 超详细)
767 0
|
机器学习/深度学习 文字识别 算法
【Keras计算机视觉OCR文字识别】文字检测算法中CTPN、CRAFT的讲解(图文解释 超详细)
【Keras计算机视觉OCR文字识别】文字检测算法中CTPN、CRAFT的讲解(图文解释 超详细)
633 0
|
机器学习/深度学习 存储 数据可视化
ECCV 2022 | CMU提出首个快速知识蒸馏的视觉框架:ResNet50 80.1%精度,训练加速30%(2)
ECCV 2022 | CMU提出首个快速知识蒸馏的视觉框架:ResNet50 80.1%精度,训练加速30%
309 0
|
编解码 固态存储 算法
论文阅读笔记 | 目标检测算法——RetinaNet(focal loss、含与SSD,RCNN,YOLO的对比)
论文阅读笔记 | 目标检测算法——RetinaNet(focal loss、含与SSD,RCNN,YOLO的对比)
1902 0
论文阅读笔记 | 目标检测算法——RetinaNet(focal loss、含与SSD,RCNN,YOLO的对比)
|
机器学习/深度学习 算法 数据可视化
DL之LSTM:基于tensorflow框架利用LSTM算法对气温数据集训练并回归预测
DL之LSTM:基于tensorflow框架利用LSTM算法对气温数据集训练并回归预测
DL之LSTM:基于tensorflow框架利用LSTM算法对气温数据集训练并回归预测
|
固态存储 算法 TensorFlow
DL之SSD:基于tensorflow利用SSD算法实现目标检测(21类)
DL之SSD:基于tensorflow利用SSD算法实现目标检测(21类)
DL之SSD:基于tensorflow利用SSD算法实现目标检测(21类)

热门文章

最新文章