目录
输出结果
1. VOC_LABELS = { 2. 'none': (0, 'Background'), 3. 'aeroplane': (1, 'Vehicle'), 4. 'bicycle': (2, 'Vehicle'), 5. 'bird': (3, 'Animal'), 6. 'boat': (4, 'Vehicle'), 7. 'bottle': (5, 'Indoor'), 8. 'bus': (6, 'Vehicle'), 9. 'car': (7, 'Vehicle'), 10. 'cat': (8, 'Animal'), 11. 'chair': (9, 'Indoor'), 12. 'cow': (10, 'Animal'), 13. 'diningtable': (11, 'Indoor'), 14. 'dog': (12, 'Animal'), 15. 'horse': (13, 'Animal'), 16. 'motorbike': (14, 'Vehicle'), 17. 'person': (15, 'Person'), 18. 'pottedplant': (16, 'Indoor'), 19. 'sheep': (17, 'Animal'), 20. 'sofa': (18, 'Indoor'), 21. 'train': (19, 'Vehicle'), 22. 'tvmonitor': (20, 'Indoor'), 23. }
SSD代码
1. 2. class SSDNet(object): 3. """Implementation of the SSD VGG-based 300 network. 4. 5. The default features layers with 300x300 image input are: 6. conv4 ==> 38 x 38 7. conv7 ==> 19 x 19 8. conv8 ==> 10 x 10 9. conv9 ==> 5 x 5 10. conv10 ==> 3 x 3 11. conv11 ==> 1 x 1 12. The default image size used to train this network is 300x300. 13. """ 14. default_params = SSDParams( 15. img_shape=(300, 300), 16. num_classes=21, 17. no_annotation_label=21, 18. feat_layers=['block4', 'block7', 'block8', 'block9', 'block10', 'block11'], 19. feat_shapes=[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)], 20. anchor_size_bounds=[0.15, 0.90], 21. # anchor_size_bounds=[0.20, 0.90], 22. anchor_sizes=[(21., 45.), 23. (45., 99.), 24. (99., 153.), 25. (153., 207.), 26. (207., 261.), 27. (261., 315.)], 28. # anchor_sizes=[(30., 60.), 29. # (60., 111.), 30. # (111., 162.), 31. # (162., 213.), 32. # (213., 264.), 33. # (264., 315.)], 34. anchor_ratios=[[2, .5], 35. [2, .5, 3, 1./3], 36. [2, .5, 3, 1./3], 37. [2, .5, 3, 1./3], 38. [2, .5], 39. [2, .5]], 40. anchor_steps=[8, 16, 32, 64, 100, 300], 41. anchor_offset=0.5, 42. normalizations=[20, -1, -1, -1, -1, -1], 43. prior_scaling=[0.1, 0.1, 0.2, 0.2] 44. ) 45. 46. def __init__(self, params=None): 47. """Init the SSD net with some parameters. Use the default ones 48. if none provided. 49. """ 50. if isinstance(params, SSDParams): 51. self.params = params 52. else: 53. self.params = SSDNet.default_params 54. 55. # ======================================================================= # 56. def net(self, inputs, 57. is_training=True, 58. update_feat_shapes=True, 59. dropout_keep_prob=0.5, 60. prediction_fn=slim.softmax, 61. reuse=None, 62. scope='ssd_300_vgg'): 63. """SSD network definition. 64. """ 65. r = ssd_net(inputs, 66. num_classes=self.params.num_classes, 67. feat_layers=self.params.feat_layers, 68. anchor_sizes=self.params.anchor_sizes, 69. anchor_ratios=self.params.anchor_ratios, 70. normalizations=self.params.normalizations, 71. is_training=is_training, 72. dropout_keep_prob=dropout_keep_prob, 73. prediction_fn=prediction_fn, 74. reuse=reuse, 75. scope=scope) 76. # Update feature shapes (try at least!) 77. if update_feat_shapes: 78. shapes = ssd_feat_shapes_from_net(r[0], self.params.feat_shapes) 79. self.params = self.params._replace(feat_shapes=shapes) 80. return r 81. 82. def arg_scope(self, weight_decay=0.0005, data_format='NHWC'): 83. """Network arg_scope. 84. """ 85. return ssd_arg_scope(weight_decay, data_format=data_format) 86. 87. def arg_scope_caffe(self, caffe_scope): 88. """Caffe arg_scope used for weights importing. 89. """ 90. return ssd_arg_scope_caffe(caffe_scope) 91. 92. # ======================================================================= # 93. def update_feature_shapes(self, predictions): 94. """Update feature shapes from predictions collection (Tensor or Numpy 95. array). 96. """ 97. shapes = ssd_feat_shapes_from_net(predictions, self.params.feat_shapes) 98. self.params = self.params._replace(feat_shapes=shapes) 99. 100. def anchors(self, img_shape, dtype=np.float32): 101. """Compute the default anchor boxes, given an image shape. 102. """ 103. return ssd_anchors_all_layers(img_shape, 104. self.params.feat_shapes, 105. self.params.anchor_sizes, 106. self.params.anchor_ratios, 107. self.params.anchor_steps, 108. self.params.anchor_offset, 109. dtype) 110. 111. def bboxes_encode(self, labels, bboxes, anchors, 112. scope=None): 113. """Encode labels and bounding boxes. 114. """ 115. return ssd_common.tf_ssd_bboxes_encode( 116. labels, bboxes, anchors, 117. self.params.num_classes, 118. self.params.no_annotation_label, 119. ignore_threshold=0.5, 120. prior_scaling=self.params.prior_scaling, 121. scope=scope) 122. 123. def bboxes_decode(self, feat_localizations, anchors, 124. scope='ssd_bboxes_decode'): 125. """Encode labels and bounding boxes. 126. """ 127. return ssd_common.tf_ssd_bboxes_decode( 128. feat_localizations, anchors, 129. prior_scaling=self.params.prior_scaling, 130. scope=scope) 131. 132. def detected_bboxes(self, predictions, localisations, 133. select_threshold=None, nms_threshold=0.5, 134. clipping_bbox=None, top_k=400, keep_top_k=200): 135. """Get the detected bounding boxes from the SSD network output. 136. """ 137. # Select top_k bboxes from predictions, and clip 138. rscores, rbboxes = \ 139. ssd_common.tf_ssd_bboxes_select(predictions, localisations, 140. select_threshold=select_threshold, 141. num_classes=self.params.num_classes) 142. rscores, rbboxes = \ 143. tfe.bboxes_sort(rscores, rbboxes, top_k=top_k) 144. # Apply NMS algorithm. 145. rscores, rbboxes = \ 146. tfe.bboxes_nms_batch(rscores, rbboxes, 147. nms_threshold=nms_threshold, 148. keep_top_k=keep_top_k) 149. if clipping_bbox is not None: 150. rbboxes = tfe.bboxes_clip(clipping_bbox, rbboxes) 151. return rscores, rbboxes 152. 153. def losses(self, logits, localisations, 154. gclasses, glocalisations, gscores, 155. match_threshold=0.5, 156. negative_ratio=3., 157. alpha=1., 158. label_smoothing=0., 159. scope='ssd_losses'): 160. """Define the SSD network losses. 161. """ 162. return ssd_losses(logits, localisations, 163. gclasses, glocalisations, gscores, 164. match_threshold=match_threshold, 165. negative_ratio=negative_ratio, 166. alpha=alpha, 167. label_smoothing=label_smoothing, 168. scope=scope) 169.