zoukankan      html  css  js  c++  java
  • SSD: Single Shot MultiBox Detector 论文解读,附代码

    一改常规套路,先上图,备注一下,这个图是盗来的

                                                                                

    文笔实在是烂透了, 想看论文解读的可以看这篇博客

    我这里就来个代码实现好了,强烈建议代码和论文配合这来看, 挺难的一篇论文

      1 #!/usr/bin/env python
      2 # -*- coding:utf-8 -*-
      3 import tensorflow as tf
      4 import numpy as np
      5 import time
      6 
      7 class SSD(object):
      8     def __init__(self, sess):
      9         self.scales = [0.2,0.35,0.50,0.65,0.80]
     10         self.ratios = [1.,2.,3.,1./2,1./3]
     11         self.num_of_class = 21
     12         self.jaccard_val = 0.6
     13         self.background_calss_val = 0
     14         self.sess = sess
     15         self.base_input = tf.placeholder(dtype=tf.float32, shape=[None, 300, 300, 3])
     16         self.feature_list = self.build_base_net(self.base_input)
     17         self.feature_class, self.feature_location = self.reshape_and_split_feature()
     18         self.default_boxes = self.generate_default_boxes()
     19         self.num_of_default_boxes = self.default_boxes.shape[0]
     20         self.groundtruth_class = tf.placeholder(shape=[None, self.num_of_default_boxes, self.num_of_class], dtype=tf.int32)
     21         self.groundtruth_location = tf.placeholder(shape=[None, self.num_of_default_boxes, 4], dtype=tf.float32)
     22         self.groundtruth_positives = tf.placeholder(shape=[None, self.num_of_default_boxes], dtype=tf.float32)
     23         self.groundtruth_negatives = tf.placeholder(shape=[None, self.num_of_default_boxes], dtype=tf.float32)
     24         self.groundtruth_count = tf.add(self.groundtruth_positives, self.groundtruth_negatives)
     25         self.softmax_cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=self.feature_class,
     26                                                                                     labels=self.groundtruth_class)
     27         self.loss_location = tf.div(tf.reduce_sum(tf.multiply(
     28             tf.reduce_sum(self.smooth_L1(tf.subtract(self.groundtruth_location, self.feature_location)),
     29                           reduction_indices=2), self.groundtruth_positives), reduction_indices=1),
     30                                     tf.reduce_sum(self.groundtruth_positives, reduction_indices=1))
     31         self.loss_class = tf.div(
     32             tf.reduce_sum(tf.multiply(self.softmax_cross_entropy, self.groundtruth_count), reduction_indices=1),
     33             tf.reduce_sum(self.groundtruth_count, reduction_indices=1))
     34         self.loss_all = tf.reduce_sum(tf.add(self.loss_class, self.loss_location))
     35         self.optimizer = tf.train.AdamOptimizer(0.001)
     36         self.train_op = self.optimizer.minimize(self.loss_all)
     37 
     38     def build_base_net(self,base_input):
     39         with tf.name_scope('net'):
     40             with tf.name_scope('base_net'):
     41                 base = base_input
     42                 base = tf.layers.conv2d(base, 32, 3, padding='same')
     43                 base = tf.layers.batch_normalization(base, training=True)
     44                 base = tf.nn.relu(base)
     45                 base = tf.layers.max_pooling2d(base, 3, (2, 2))
     46                 # 150,150
     47                 base = tf.layers.conv2d(base, 64, 3, padding='same')
     48                 base = tf.layers.batch_normalization(base, training=True)
     49                 base = tf.nn.relu(base)
     50                 base = tf.layers.max_pooling2d(base, 3, (2, 2))
     51                 # 75,75
     52                 base = tf.layers.conv2d(base, 128, 3, padding='same')
     53                 base = tf.layers.conv2d(base, 128, 3, padding='same')
     54                 base = tf.layers.batch_normalization(base, training=True)
     55                 base = tf.nn.relu(base)
     56                 base = tf.layers.max_pooling2d(base, 3, (2, 2))
     57                 # 37,37
     58             predict_1, down_sample_1 = self.down_sample_and_predict(base)
     59             predict_2, down_sample_2 = self.down_sample_and_predict(down_sample_1)
     60             predict_3, down_sample_3 = self.down_sample_and_predict(down_sample_2)
     61             predict_4, down_sample_4 = self.down_sample_and_predict(down_sample_3)
     62             predict_5 = self.predict_only(down_sample_4)
     63             feature_list = [predict_1, predict_2, predict_3, predict_4, predict_5]
     64 
     65         return feature_list
     66 
     67     def down_sample_and_predict(self, feature):
     68         with tf.name_scope('down_and_predict'):
     69             channels = feature.get_shape().as_list()[3]
     70             predict = tf.layers.conv2d(feature, 5*(self.num_of_class + 4), 3, padding='same')
     71             down_sample= tf.layers.conv2d(feature, 2*channels, 3, padding='same')
     72             down_sample = tf.nn.relu(down_sample)
     73             down_sample = tf.layers.max_pooling2d(down_sample, 3, strides=(2,2), padding='same')
     74             print('predictor shape :',predict.get_shape().as_list())
     75             print('down_sample shape :', down_sample.get_shape().as_list())
     76             return predict, down_sample
     77 
     78     def predict_only(self, feature):
     79         with tf.name_scope('down_and_predict'):
     80             predict = tf.layers.conv2d(feature, 5*(self.num_of_class + 4), 3, padding='same')
     81             print('predictor shape :',predict.get_shape().as_list())
     82             return predict
     83 
     84     def reshape_and_split_feature(self):
     85         feature_list = self.feature_list
     86         reshape_feature = []
     87         for feature in feature_list:
     88             width = feature.get_shape().as_list()[2]
     89             height = feature.get_shape().as_list()[1]
     90             reshape_feature.append(tf.reshape(feature, [-1, width*height*5, self.num_of_class+4]))
     91         reshape_feature = tf.concat(reshape_feature, axis=1)
     92         print('预测得到 %d 个default boxes'%reshape_feature.get_shape().as_list()[1])
     93         feature_class = reshape_feature[:,:,:self.num_of_class]
     94         feature_location = reshape_feature[:,:,self.num_of_class:]
     95         print('feature_class shape:',feature_class.get_shape().as_list())
     96         print('feature_location shape:', feature_location.get_shape().as_list())
     97         return feature_class, feature_location
     98 
     99     def generate_default_boxes(self):
    100         default_boxes = []
    101         t_start = time.time()
    102         feature_list = self.feature_list
    103         for index, feature in enumerate(feature_list):
    104             width = feature.get_shape().as_list()[2]
    105             height = feature.get_shape().as_list()[1]
    106             scale = self.scales[index]
    107             for x in range(width):
    108                 for y in range(height):
    109                     for i in range(len(self.ratios)):
    110                         top_x = x*1. / width
    111                         top_y = y*1. / height
    112                         box_width = scale * np.sqrt(self.ratios[i])
    113                         box_height = scale / np.sqrt(self.ratios[i])
    114                         default_boxes.append([top_x, top_y, box_width, box_height])
    115         t_end = time.time()
    116         print('generate %d boxes '%len(default_boxes),'takes %f seconds'%(t_end - t_start))
    117         default_boxes = np.asarray(default_boxes, dtype=np.float32)
    118         print('default_boxes shape',default_boxes.shape)
    119         return default_boxes
    120 
    121     def compute_jaccard(self, box_1, box_2):
    122         x_len = max(0, min(box_1[0]+box_1[2], box_2[0]+ box_2[2]) - max(box_1[0], box_2[0]))
    123         y_len = max(0, min(box_1[1] + box_1[3], box_2[1] + box_2[3]) - max(box_1[1], box_2[1]))
    124         inter = x_len* y_len
    125         union = box_1[2]*box_1[3] + box_2[2]*box_2[3] - inter
    126         if union == 0:
    127             return 0
    128         else:
    129             return inter / union
    130 
    131     def process_ground_truth(self, actual_input):
    132         num_of_input = len(actual_input)
    133         process_ground_truth_class = np.zeros(
                        shape=[num_of_input, self.num_of_default_boxes, self.num_of_class],
                        dtype=np.int32) 134 process_ground_truth_location = np.zeros(
                        shape=[num_of_input, self.num_of_default_boxes, 4],
                        dtype=np.float32) 135 process_ground_truth_positives = np.zeros(
                        shape=[num_of_input, self.num_of_default_boxes],
                        dtype=np.float32) 136 process_ground_truth_negatives = np.zeros_like(process_ground_truth_positives) 137 process_ground_truth_jaccard = np.zeros_like(process_ground_truth_positives) 138 for index, actual in enumerate(actual_input): 139 for actual_in in actual: 140 label = actual_in[-1:][0] 141 box_info = actual_in[:-1] 142 for box_index in range(self.num_of_default_boxes): 143 jacc = self.compute_jaccard(self.default_boxes[box_index], box_info) 144 if jacc >= self.jaccard_val: 145 process_ground_truth_class[index][box_index][label] = 1 146 process_ground_truth_location[index][box_index] = box_info 147 process_ground_truth_positives[index][box_index] = 1 148 process_ground_truth_negatives[index][box_index] = 0 149 process_ground_truth_jaccard[index][box_index] = jacc 150 151 if int(np.sum(process_ground_truth_positives[index])) == 0: 152 random_index = np.random.randint(0,self.num_of_default_boxes,1)[0] 153 process_ground_truth_class[index][random_index][0] = 1 154 process_ground_truth_location[index][random_index] = [0,0,0,0] 155 process_ground_truth_positives[index][random_index] = 1 156 process_ground_truth_negatives[index][random_index] = 0 157 process_ground_truth_jaccard[index][random_index] = self.jaccard_val 158 159 negative_count = 3*int(np.sum(process_ground_truth_positives[index])) 160 if 4*int(np.sum(process_ground_truth_positives[index])) > self.num_of_default_boxes: 161 negative_count = self.num_of_default_boxes - int(np.sum(process_ground_truth_positives[index])) 162 nega_indexs = np.random.randint(0,self.num_of_default_boxes, negative_count) 163 for nega_index in nega_indexs: 164 if process_ground_truth_jaccard[index][nega_index] < 0.3: 165 process_ground_truth_class[index][nega_index][0] = 1 166 process_ground_truth_positives[index][nega_index] = 0 167 process_ground_truth_negatives[index][nega_index] = 1 168 169 return process_ground_truth_class, process_ground_truth_location,
               process_ground_truth_positives, process_ground_truth_negatives
    170 171 def smooth_L1(self, x): 172 return tf.where(tf.less_equal(tf.abs(x),1.0), tf.multiply(0.5, tf.pow(x, 2.0)), tf.subtract(tf.abs(x), 0.5)) 173 174 sess= tf.InteractiveSession() 175 ssd = SSD(sess)

    要回去睡觉了,差不多先搞这么多

  • 相关阅读:
    感想
    正则表达式
    推送、透传、MQ
    Spring集成Quartz定时任务 ---- 定时执行
    代码优化
    nginx配置详解、端口重定向和504
    JAVA实现EXCEL导出
    js 按需加载
    MyBatis使用(二)分页查询
    MyBatis使用(一)
  • 原文地址:https://www.cnblogs.com/zxxian/p/8306334.html
Copyright © 2011-2022 走看看