输出结果
VOC_LABELS = {
'none': (0, 'Background'),
'aeroplane': (1, 'Vehicle'),
'bicycle': (2, 'Vehicle'),
'bird': (3, 'Animal'),
'boat': (4, 'Vehicle'),
'bottle': (5, 'Indoor'),
'bus': (6, 'Vehicle'),
'car': (7, 'Vehicle'),
'cat': (8, 'Animal'),
'chair': (9, 'Indoor'),
'cow': (10, 'Animal'),
'diningtable': (11, 'Indoor'),
'dog': (12, 'Animal'),
'horse': (13, 'Animal'),
'motorbike': (14, 'Vehicle'),
'person': (15, 'Person'),
'pottedplant': (16, 'Indoor'),
'sheep': (17, 'Animal'),
'sofa': (18, 'Indoor'),
'train': (19, 'Vehicle'),
'tvmonitor': (20, 'Indoor'),
}
SSD代码
class SSDNet(object):
"""Implementation of the SSD VGG-based 300 network.
The default features layers with 300x300 image input are:
conv4 ==> 38 x 38
conv7 ==> 19 x 19
conv8 ==> 10 x 10
conv9 ==> 5 x 5
conv10 ==> 3 x 3
conv11 ==> 1 x 1
The default image size used to train this network is 300x300.
"""
default_params = SSDParams(
img_shape=(300, 300),
num_classes=21,
no_annotation_label=21,
feat_layers=['block4', 'block7', 'block8', 'block9', 'block10', 'block11'],
feat_shapes=[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)],
anchor_size_bounds=[0.15, 0.90],
# anchor_size_bounds=[0.20, 0.90],
anchor_sizes=[(21., 45.),
(45., 99.),
(99., 153.),
(153., 207.),
(207., 261.),
(261., 315.)],
# anchor_sizes=[(30., 60.),
# (60., 111.),
# (111., 162.),
# (162., 213.),
# (213., 264.),
# (264., 315.)],
anchor_ratios=[[2, .5],
[2, .5, 3, 1./3],
[2, .5, 3, 1./3],
[2, .5, 3, 1./3],
[2, .5],
[2, .5]],
anchor_steps=[8, 16, 32, 64, 100, 300],
anchor_offset=0.5,
normalizations=[20, -1, -1, -1, -1, -1],
prior_scaling=[0.1, 0.1, 0.2, 0.2]
)
def __init__(self, params=None):
"""Init the SSD net with some parameters. Use the default ones
if none provided.
"""
if isinstance(params, SSDParams):
self.params = params
else:
self.params = SSDNet.default_params
# ======================================================================= #
def net(self, inputs,
is_training=True,
update_feat_shapes=True,
dropout_keep_prob=0.5,
prediction_fn=slim.softmax,
reuse=None,
scope='ssd_300_vgg'):
"""SSD network definition.
"""
r = ssd_net(inputs,
num_classes=self.params.num_classes,
feat_layers=self.params.feat_layers,
anchor_sizes=self.params.anchor_sizes,
anchor_ratios=self.params.anchor_ratios,
normalizations=self.params.normalizations,
is_training=is_training,
dropout_keep_prob=dropout_keep_prob,
prediction_fn=prediction_fn,
reuse=reuse,
scope=scope)
# Update feature shapes (try at least!)
if update_feat_shapes:
shapes = ssd_feat_shapes_from_net(r[0], self.params.feat_shapes)
self.params = self.params._replace(feat_shapes=shapes)
return r
def arg_scope(self, weight_decay=0.0005, data_format='NHWC'):
"""Network arg_scope.
"""
return ssd_arg_scope(weight_decay, data_format=data_format)
def arg_scope_caffe(self, caffe_scope):
"""Caffe arg_scope used for weights importing.
"""
return ssd_arg_scope_caffe(caffe_scope)
# ======================================================================= #
def update_feature_shapes(self, predictions):
"""Update feature shapes from predictions collection (Tensor or Numpy
array).
"""
shapes = ssd_feat_shapes_from_net(predictions, self.params.feat_shapes)
self.params = self.params._replace(feat_shapes=shapes)
def anchors(self, img_shape, dtype=np.float32):
"""Compute the default anchor boxes, given an image shape.
"""
return ssd_anchors_all_layers(img_shape,
self.params.feat_shapes,
self.params.anchor_sizes,
self.params.anchor_ratios,
self.params.anchor_steps,
self.params.anchor_offset,
dtype)
def bboxes_encode(self, labels, bboxes, anchors,
scope=None):
"""Encode labels and bounding boxes.
"""
return ssd_common.tf_ssd_bboxes_encode(
labels, bboxes, anchors,
self.params.num_classes,
self.params.no_annotation_label,
ignore_threshold=0.5,
prior_scaling=self.params.prior_scaling,
scope=scope)
def bboxes_decode(self, feat_localizations, anchors,
scope='ssd_bboxes_decode'):
"""Encode labels and bounding boxes.
"""
return ssd_common.tf_ssd_bboxes_decode(
feat_localizations, anchors,
prior_scaling=self.params.prior_scaling,
scope=scope)
def detected_bboxes(self, predictions, localisations,
select_threshold=None, nms_threshold=0.5,
clipping_bbox=None, top_k=400, keep_top_k=200):
"""Get the detected bounding boxes from the SSD network output.
"""
# Select top_k bboxes from predictions, and clip
rscores, rbboxes = \
ssd_common.tf_ssd_bboxes_select(predictions, localisations,
select_threshold=select_threshold,
num_classes=self.params.num_classes)
rscores, rbboxes = \
tfe.bboxes_sort(rscores, rbboxes, top_k=top_k)
# Apply NMS algorithm.
rscores, rbboxes = \
tfe.bboxes_nms_batch(rscores, rbboxes,
nms_threshold=nms_threshold,
keep_top_k=keep_top_k)
if clipping_bbox is not None:
rbboxes = tfe.bboxes_clip(clipping_bbox, rbboxes)
return rscores, rbboxes
def losses(self, logits, localisations,
gclasses, glocalisations, gscores,
match_threshold=0.5,
negative_ratio=3.,
alpha=1.,
label_smoothing=0.,
scope='ssd_losses'):
"""Define the SSD network losses.
"""
return ssd_losses(logits, localisations,
gclasses, glocalisations, gscores,
match_threshold=match_threshold,
negative_ratio=negative_ratio,
alpha=alpha,
label_smoothing=label_smoothing,
scope=scope)