2、resnet.py
作为骨架,resnet_maskrcnn模型,代码中,也可选用resnet50、resnet101、resnet152骨架模型。
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import warnings
import keras
import keras_resnet
import keras_resnet.models
import keras_retinanet.models.resnet
from ..models import retinanet, Backbone
class ResNetBackbone(Backbone, keras_retinanet.models.resnet.ResNetBackbone):
def maskrcnn(self, *args, **kwargs):
""" Returns a maskrcnn model using the correct backbone.
"""
return resnet_maskrcnn(*args, backbone=self.backbone, **kwargs)
def resnet_maskrcnn(num_classes, backbone='resnet50', inputs=None, modifier=None, mask_dtype=keras.backend.floatx(), **kwargs):
# choose default input
if inputs is None:
inputs = keras.layers.Input(shape=(None, None, 3), name='image')
# create the resnet backbone
if backbone == 'resnet50':
resnet = keras_resnet.models.ResNet50(inputs, include_top=False, freeze_bn=True)
elif backbone == 'resnet101':
resnet = keras_resnet.models.ResNet101(inputs, include_top=False, freeze_bn=True)
elif backbone == 'resnet152':
resnet = keras_resnet.models.ResNet152(inputs, include_top=False, freeze_bn=True)
# invoke modifier if given
if modifier:
resnet = modifier(resnet)
# create the full model
model = retinanet.retinanet_mask(inputs=inputs, num_classes=num_classes, backbone_layers=resnet.outputs[1:], mask_dtype=mask_dtype, **kwargs)
return model
def resnet50_maskrcnn(num_classes, inputs=None, **kwargs):
return resnet_maskrcnn(num_classes=num_classes, backbone='resnet50', inputs=inputs, **kwargs)
def resnet101_maskrcnn(num_classes, inputs=None, **kwargs):
return resnet_maskrcnn(num_classes=num_classes, backbone='resnet101', inputs=inputs, **kwargs)
def resnet152_maskrcnn(num_classes, inputs=None, **kwargs):
return resnet_maskrcnn(num_classes=num_classes, backbone='resnet152', inputs=inputs, **kwargs)
3、roi.py
包含RoiAlign类的实现,包含map_to_level等计算函数
import keras.backend
import keras.layers
import keras_retinanet.backend
from .. import backend
class RoiAlign(keras.layers.Layer):
def __init__(self, crop_size=(14, 14), **kwargs):
self.crop_size = crop_size
super(RoiAlign, self).__init__(**kwargs)
def map_to_level(self, boxes, canonical_size=224, canonical_level=1, min_level=0, max_level=4):
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
w = x2 - x1
h = y2 - y1
size = keras.backend.sqrt(w * h)
levels = backend.floor(canonical_level + backend.log2(size / canonical_size + keras.backend.epsilon()))
levels = keras.backend.clip(levels, min_level, max_level)
return levels
def call(self, inputs, **kwargs):
# TODO: Support batch_size > 1
image_shape = keras.backend.cast(inputs[0], keras.backend.floatx())
boxes = keras.backend.stop_gradient(inputs[1][0])
scores = keras.backend.stop_gradient(inputs[2][0])
fpn = [keras.backend.stop_gradient(i[0]) for i in inputs[3:]]
# compute from which level to get features from
target_levels = self.map_to_level(boxes)
# process each pyramid independently
rois = []
ordered_indices = []
for i in range(len(fpn)):
# select the boxes and classification from this pyramid level
indices = keras_retinanet.backend.where(keras.backend.equal(target_levels, i))
ordered_indices.append(indices)
level_boxes = keras_retinanet.backend.gather_nd(boxes, indices)
fpn_shape = keras.backend.cast(keras.backend.shape(fpn[i]), dtype=keras.backend.floatx())
# convert to expected format for crop_and_resize
x1 = level_boxes[:, 0]
y1 = level_boxes[:, 1]
x2 = level_boxes[:, 2]
y2 = level_boxes[:, 3]
level_boxes = keras.backend.stack([
(y1 / image_shape[1] * fpn_shape[0]) / (fpn_shape[0] - 1),
(x1 / image_shape[2] * fpn_shape[1]) / (fpn_shape[1] - 1),
(y2 / image_shape[1] * fpn_shape[0] - 1) / (fpn_shape[0] - 1),
(x2 / image_shape[2] * fpn_shape[1] - 1) / (fpn_shape[1] - 1),
], axis=1)
# append the rois to the list of rois
rois.append(backend.crop_and_resize(
keras.backend.expand_dims(fpn[i], axis=0),
level_boxes,
keras.backend.zeros((keras.backend.shape(level_boxes)[0],), dtype='int32'),
self.crop_size
))
# concatenate rois to one blob
rois = keras.backend.concatenate(rois, axis=0)
# reorder rois back to original order
indices = keras.backend.concatenate(ordered_indices, axis=0)
rois = keras_retinanet.backend.scatter_nd(indices, rois, keras.backend.cast(keras.backend.shape(rois), 'int64'))
return keras.backend.expand_dims(rois, axis=0)
def compute_output_shape(self, input_shape):
return (input_shape[1][0], None, self.crop_size[0], self.crop_size[1], input_shape[3][-1])
def get_config(self):
config = super(RoiAlign, self).get_config()
config.update({
'crop_size' : self.crop_size,
})
return config
4、losses.py
MaskR-CNN计算损失
# compute mask loss
mask_loss = keras.backend.binary_crossentropy(masks_target, masks)
normalizer = keras.backend.shape(masks)[0] * keras.backend.shape(masks)[1] * keras.backend.shape(masks)[2]
normalizer = keras.backend.maximum(keras.backend.cast(normalizer, keras.backend.floatx()), 1)
mask_loss = keras.backend.sum(mask_loss) / normalizer
import keras.backend
import keras_retinanet.backend
from . import backend
def mask(iou_threshold=0.5, mask_size=(28, 28)):
def _mask_conditional(y_true, y_pred):
# if there are no masks annotations, return 0; else, compute the masks loss
loss = backend.cond(
keras.backend.any(keras.backend.equal(keras.backend.shape(y_true), 0)),
lambda: keras.backend.cast_to_floatx(0.0),
lambda: _mask(y_true, y_pred, iou_threshold=iou_threshold, mask_size=mask_size)
)
return loss
def _mask(y_true, y_pred, iou_threshold=0.5, mask_size=(28, 28)):
# split up the different predicted blobs
boxes = y_pred[:, :, :4]
masks = y_pred[:, :, 4:]
# split up the different blobs
annotations = y_true[:, :, :5]
width = keras.backend.cast(y_true[0, 0, 5], dtype='int32')
height = keras.backend.cast(y_true[0, 0, 6], dtype='int32')
masks_target = y_true[:, :, 7:]
# reshape the masks back to their original size
masks_target = keras.backend.reshape(masks_target, (keras.backend.shape(masks_target)[0], keras.backend.shape(masks_target)[1], height, width))
masks = keras.backend.reshape(masks, (keras.backend.shape(masks)[0], keras.backend.shape(masks)[1], mask_size[0], mask_size[1], -1))
# TODO: Fix batch_size > 1
boxes = boxes[0]
masks = masks[0]
annotations = annotations[0]
masks_target = masks_target[0]
# compute overlap of boxes with annotations
iou = backend.overlap(boxes, annotations)
argmax_overlaps_inds = keras.backend.argmax(iou, axis=1)
max_iou = keras.backend.max(iou, axis=1)
# filter those with IoU > 0.5
indices = keras_retinanet.backend.where(keras.backend.greater_equal(max_iou, iou_threshold))
boxes = keras_retinanet.backend.gather_nd(boxes, indices)
masks = keras_retinanet.backend.gather_nd(masks, indices)
argmax_overlaps_inds = keras.backend.cast(keras_retinanet.backend.gather_nd(argmax_overlaps_inds, indices), 'int32')
labels = keras.backend.cast(keras.backend.gather(annotations[:, 4], argmax_overlaps_inds), 'int32')
# make normalized boxes
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
boxes = keras.backend.stack([
y1 / (keras.backend.cast(height, dtype=keras.backend.floatx()) - 1),
x1 / (keras.backend.cast(width, dtype=keras.backend.floatx()) - 1),
(y2 - 1) / (keras.backend.cast(height, dtype=keras.backend.floatx()) - 1),
(x2 - 1) / (keras.backend.cast(width, dtype=keras.backend.floatx()) - 1),
], axis=1)
# crop and resize masks_target
masks_target = keras.backend.expand_dims(masks_target, axis=3) # append a fake channel dimension
masks_target = backend.crop_and_resize(
masks_target,
boxes,
argmax_overlaps_inds,
mask_size
)
masks_target = masks_target[:, :, :, 0] # remove fake channel dimension
# gather the predicted masks using the annotation label
masks = backend.transpose(masks, (0, 3, 1, 2))
label_indices = keras.backend.stack([
keras.backend.arange(keras.backend.shape(labels)[0]),
labels
], axis=1)
masks = keras_retinanet.backend.gather_nd(masks, label_indices)
# compute mask loss
mask_loss = keras.backend.binary_crossentropy(masks_target, masks)
normalizer = keras.backend.shape(masks)[0] * keras.backend.shape(masks)[1] * keras.backend.shape(masks)[2]
normalizer = keras.backend.maximum(keras.backend.cast(normalizer, keras.backend.floatx()), 1)
mask_loss = keras.backend.sum(mask_loss) / normalizer
return mask_loss
return _mask_conditional