1 #coding = utf-8
2
3 import collections
4 import tensorflow as tf
5 from datetime import datetime
6 import math
7 import time
8
9 slim = tf.contrib.slim
10
11
12 class Block(collections.namedtuple('Block', ['scope', 'unit_fn', 'args'])):
13 '''A named tuple describing a ResNet block.'''
14
15 def subsample(inputs, factor, scope=None):
16 '''降采样方法:
17 factor:采样因子 1:不做修改直接返回 不为1:使用slim.max_pool2d降采样'''
18 if factor ==1:
19 return inputs
20 else:
21 return slim.max_pool2d(inputs, [1, 1], stride=factor, scope=scope)
22
23
24 def conv2d_same(inputs, num_outputs, kernel_size, stride, scope=None):
25 '''创建卷积层'''
26 if stride == 1:
27 '''stride为1,使用slim.conv2d,padding为SAME'''
28 return slim.conv2d(inputs, num_outputs, kernel_size, stride=1,
29 padding='SAME', scope=scope)
30
31 else:
32 '''显示地pad zero:
33 pad zero总数为kernel size-1,pad_beg:pad//2, pad_end:余下部分'''
34 pad_total = kernel_size-1
35 pad_beg = pad_total//2
36 pad_end = pad_total - pad_beg
37 '''tf.pad对inputs进行补零操作'''
38 inputs = tf.pad(inputs, [[0,0], [pad_beg, pad_end],
39 [pad_beg, pad_end], [0, 0]])
40
41 return slim.conv2d(inputs, num_outputs, kernel_size, stride=stride,
42 padding='VALID', scope=scope)
43
44 @slim.add_arg_scope
45 def stack_blocks_dense(net, blocks, outputs_collections=None):
46 '''net:input
47 blocks:Block的class的列表
48 outputs_collections:收集各个end_points的collections'''
49 for block in blocks:
50 '''双层for循环,逐个Block,逐个Residual Unit堆叠'''
51 with tf.variable_scope(block.scope, 'block', [net]) as sc:
52 '''两个tf.variable将残差学习单元命名为block_1/unit_1形式'''
53
54 for i, unit in enumerate(block.args):
55 with tf.variable_scope('unit_%d' %(i+1), values=[net]):
56
57 '''利用第二层for循环拿到前面定义Blocks Residual Unit中args,
58 将其展开为depth、depth_bottleneck、stride'''
59 unit_depth, unit_depth_bottleneck, unit_stride = unit
60
61 '''使用unit_fn函数(残差学习单元的生成函数)
62 顺序地创建并连接所有的残差学习单元'''
63 net = block.unit_fn(net,
64 depth=unit_depth,
65 depth_bottleneck=unit_depth_bottleneck,
66 stride=unit_stride)
67
68 '''slim.utils.collect_named_outputs将输出net添加到collection中'''
69 net = slim.utils.collect_named_outputs(outputs_collections, sc.name, net)
70
71 '''所有的Residual Unit都堆叠完后,最后返回net作为stack_blocks_dense的结果'''
72 return net
73
74
75 def resnet_arg_scope(is_training=True,
76 weight_decay=0.0001,
77 batch_norm_decay=0.097,
78 batch_norm_epsilon=1e-5,
79 batch_norm_scale=True):
80 '''创建ResNet通用的arg_scope(作用:定义某些函数的参数默认值)'''
81
82 batch_norm_params = {
83 'is_training': is_training,
84 'decay': batch_norm_decay,#默认为0.0001,BN的衰减速率默认为:0.997
85 'epsilon': batch_norm_epsilon,#默认为1e-5
86 'scale': batch_norm_scale,#BN的scale默认为True
87 'updates_collections': tf.GraphKeys.UPDATE_OPS,
88 }
89
90 with slim.arg_scope(
91 [slim.conv2d],
92 weights_regularizer=slim.l2_regularizer(weight_decay),
93 weights_initializer=slim.variance_scaling_initializer(),
94 activation_fn=tf.nn.relu,
95 normalizer_fn=slim.batch_norm,
96 normalizer_params=batch_norm_params):
97
98 with slim.arg_scope([slim.batch_norm], **batch_norm_params):
99 with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc:
100
101 return arg_sc
102
103 @slim.add_arg_scope
104 def bottleneck(inputs, depth, depth_bottleneck, stride,
105 outputs_collections=None, scope=None):
106 '''bottleneck残差学习单元
107 inputs:输入
108 depth、depth_bottleneck、stride是Blocks类中的args
109 outputs_collections:收集end_points的collection
110 scope:unit的名称'''
111 with tf.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc:
112
113 '''slim.utils.last_dimension获取输入的最后一个维度,输出通道数,min_rank=4限定最少为4个维度'''
114 depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
115
116 '''slim.batch_norm对输入进行Batch Normalization,接着用relu进行预激活的Preactivate'''
117 preact = slim.batch_norm(inputs, activation_fn=tf.nn.relu,
118 scope='preact')
119 '''定义shortcut(直连的x)'''
120 if depth == depth_in:
121 '''如果残差单元输入通道数和输出通道数一样
122 使用subsample按步长对inputs进行空间上的降采样'''
123 shortcut = subsample(inputs, stride, 'shortcut')
124
125 else:
126 '''如果残差单元输入通道数和输出通道数不一样,
127 使用stride步长的1x1卷积改变其通道数,使得输入通道数和输出通道数一致'''
128 shortcut = slim.conv2d(preact, depth, [1, 1], stride=stride,
129 normalizer_fn=None, activation_fn=None,
130 scope='shortcut')
131 '''定义残差:
132 第一步:1x1尺寸、步长为1、输出通道数为depth_bottleneck的卷积
133 第二步:3x3尺寸、步长为stride、输出通道数为depth_bottleneck的卷积
134 第三步:1x1尺寸、步长为1、输出通道数为depth的卷积'''
135 residual = slim.conv2d(preact, depth_bottleneck, [1, 1], stride=1,
136 scope='conv1')
137
138 residual = slim.conv2d(residual, depth_bottleneck, 3, stride,
139 scope='conv2')
140 residual = slim.conv2d(residual, depth, [1, 1], stride=1,
141 normalizer_fn=None, activation_fn=None,
142 scope='conv3')
143
144 output = shortcut + residual
145
146 '''slim.utils.collect_named_ouputs将结果添加到outputs_collections并返回output作为函数结果'''
147 return slim.utils.collect_named_outputs(outputs_collections, sc.name, output)
148
149
150 def resnet_v2(inputs,
151 blocks,
152 num_classes=None,
153 global_pool=True,
154 include_root_block=True,
155 reuse=None,
156 scope=None):
157 '''定义生成ResNet V2的主函数
158 inputs:输入
159 blocks:定义好的Blocks类的的列表
160 num_classes:最后输出的类数
161 global_pool:是否加上最后的一层全局平均池化的标志
162 include_root_blocks:是否加上ResNet网络最前面通常使用的7x7卷积核最大池化的标志
163 reuse:是否重用的标志
164 scope:整个网络名称'''
165
166 with tf.variable_scope(scope, 'resent_v2', [inputs], reuse=reuse) as sc:
167 end_points_collection = sc.original_name_scope + '_end_points'
168
169 '''slim.arg_scope将slim.conv2d, bottleneck,stack_blocks_dense 3个函数的参数
170 outputs_collections默认设置为end_points_collection'''
171 with slim.arg_scope([slim.conv2d, bottleneck,
172 stack_blocks_dense],
173 outputs_collections=end_points_collection):
174
175 net = inputs
176
177 if include_root_block:
178
179 with slim.arg_scope([slim.conv2d], activation_fn=None,
180 normalizer_fn=None):
181 '''根据include_root_block标记,创建ResNet
182 最前面的64输出通道的步长为2的7x7卷积'''
183 net = conv2d_same(net, 64, 7, stride=2, scope='conv1')
184
185 '''步长为2的3x3最大池化,经过2次步长为2的层后,图片尺寸已经缩小为1/4'''
186 net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')
187 '''利用stack_blocks_dens将残差学习模块完成'''
188 net = stack_blocks_dense(net, blocks)
189 net = slim.batch_norm(net, activation_fn=tf.nn.relu, scope='postnorm')
190
191 if global_pool:
192 '''根据标记添加平均池化层,这里用tf.reduce_mean比avg_pool高'''
193 net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True)
194
195 if num_classes is not None:
196 '''根据是否有分类数,添加一个输出通道为num_classes的1x1卷积'''
197 net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
198 normalizer_fn=None, scope='logits')
199
200 '''slim.utils.convert_collection_to_dict将collection转化为dict'''
201 end_points = slim.utils.convert_collection_to_dict(end_points_collection)
202
203 if num_classes is not None:
204 '''添加一个softmax层输出网络结果'''
205 end_points['prediction'] = slim.softmax(net, scope='predictions')
206
207 return net, end_points
208
209
210 def resnet_v2_50(inputs,
211 num_classes=None,
212 global_pool=True,
213 reuse=None,
214 scope='resnet_v2_50'):
215 '''设计50层的ResNet
216 四个blocks的units数量为3、4、6、3,总层数为(3+4+6+3)*3+2=50
217 前3个blocks包含步长为2的层,总尺寸224/(4*2*2*2)=7 输出通道变为2048'''
218 blocks = [
219 Block('block1', bottleneck, [(256, 64, 1)]*2 + [(256, 64, 2)]),
220 Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]),
221 Block('block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 2)]),
222 Block('block4', bottleneck, [(2048, 512, 1)] * 3)
223 ]
224
225 return resnet_v2(inputs, blocks, num_classes, global_pool,
226 include_root_block=True, reuse=reuse, scope=scope)
227
228 def resnet_v2_101(inputs,
229 num_classes=None,
230 global_pool=True,
231 reuse=None,
232 scope='resnet_v2_101'):
233 '''设计101层的ResNet
234 四个blocks的units数量为3、4、23、3,总层数为(3+4+23+3)*3+2=101
235 前3个blocks包含步长为2的层,总尺寸224/(4*2*2*2)=7 输出通道变为2048'''
236 blocks = [
237 Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
238 Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]),
239 Block('block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 2)]),
240 Block('block4', bottleneck, [(2048, 512, 1)] * 3)
241 ]
242
243 return resnet_v2(inputs, blocks, num_classes, global_pool,
244 include_root_block=True, reuse=reuse, scope=scope)
245
246 def resnet_v2_152(inputs,
247 num_classes=None,
248 global_pool=True,
249 reuse=None,
250 scope='resnet_v2_152'):
251 '''设计152层的ResNet
252 四个blocks的units数量为3、8、36、3,总层数为(3+8+36+3)*3+2=152
253 前3个blocks包含步长为2的层,总尺寸224/(4*2*2*2)=7 输出通道变为2048'''
254 blocks = [
255 Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
256 Block('block2', bottleneck, [(512, 128, 1)] * 7 + [(512, 128, 2)]),
257 Block('block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
258 Block('block4', bottleneck, [(2048, 512, 1)] * 3)
259 ]
260
261 return resnet_v2(inputs, blocks, num_classes, global_pool,
262 include_root_block=True, reuse=reuse, scope=scope)
263
264 def resnet_v2_200(inputs,
265 num_classes=None,
266 global_pool=True,
267 reuse=None,
268 scope='resnet_v2_200'):
269 '''设计200层的ResNet
270 四个blocks的units数量为3、8、36、3,总层数为(3+24+36+3)*3+2=200
271 前3个blocks包含步长为2的层,总尺寸224/(4*2*2*2)=7 输出通道变为2048'''
272 blocks = [
273 Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
274 Block('block2', bottleneck, [(512, 128, 1)] * 23 + [(512, 128, 2)]),
275 Block('block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
276 Block('block4', bottleneck, [(2048, 512, 1)] * 3)
277 ]
278
279 return resnet_v2(inputs, blocks, num_classes, global_pool,
280 include_root_block=True, reuse=reuse, scope=scope)
281
282 def time_tensorflow_run(session, target, info_string):
283
284 num_steps_burn_in = 10
285 total_duration = 0.0
286 total_duration_squared = 0.0
287 for i in range(num_batches+num_steps_burn_in):
288 start_time = time.time()
289 _ = session.run(target)
290 duration = time.time()-start_time
291
292 if i >= num_steps_burn_in:
293 if not i % 10:
294 print('%s: step %d, duration = %.3f' %(datetime.now(), i-num_steps_burn_in, duration))
295 total_duration += duration
296 total_duration_squared += duration*duration
297
298 mn = total_duration/num_batches
299 vr = total_duration_squared/num_batches-mn*mn
300 sd = math.sqrt(vr)
301
302 print('%s: %s across %d steps, %.3f +/- %3.3f sec/batch' %(datetime.now(), info_string, num_batches, mn, sd))
303
304 batch_size = 32
305 height, width = 224, 224
306 inputs = tf.random_uniform((batch_size, height, width, 3))
307 with slim.arg_scope(resnet_arg_scope(is_training=False)):
308 net, end_points = resnet_v2_152(inputs, 1000)
309
310 init = tf.global_variables_initializer()
311 sess = tf.Session()
312 sess.run(init)
313 num_batches = 100
314 time_tensorflow_run(sess, net, 'Forward')