1 from ... import input_data 2 input_data=data_read() 3 import tensorflow as tf 4 5 def conv(name,x,w,b): 6 return tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(x,w,strides=[1,1,1,1],padding='SAME'),b),name=name) 7 8 def max_pool(name,x,k): 9 return tf.nn.max_pool(x,ksize=[1,k,k,1],strides=[1,k,k,1],padding='SAME',name=name) 10 11 def fc(name,x,w,b): 12 return tf.nn.relu(tf.matmul(x,w)+b,name=name) 13 14 def vgg_net(_X,_weights,_biases,keep_prob): 15 x_shape=_X.get_shape() 16 _X=tf.reshape(_X,shape=[-1,X_shape[1].value,x_shape[2].value,x_shape[3].value]) 17 18 conv1_1=conv('conv1_1',_X,_weights['wc1_1'],_biases['bc1_1']) 19 conv1_2=conv('conv1_2',conv1_1,_weights['wc1_2'],_biases['bc1_2']) 20 pool1=max_pool('pool1',conv1_2,k=2) 21 22 conv2_1=conv('conv2_1',pool1,_weights['wc2_1'],_biases['bc2_1']) 23 conv2_2=conv('conv2_2',conv2_1,_weights['wc2_2'],_biases['bc2_2']) 24 pool2=max_pool('pool2',conv2_2,k=2) 25 26 conv3_1=conv('conv3_1',pool2,_weights['wc3_1'],_biases['bc3_1']) 27 conv3_2=conv('conv3_2',conv3_1,_weights['wc3_2'],_biases['bc3_2']) 28 conv3_3=conv('conv3_3',conv3_2,_weights['wc3_3'],_biases['bc3_3']) 29 pool3=max_pool('pool3',conv3_3,k=2) 30 31 conv4_1=conv('conv4_1',pool3,_weights['wc4_1'],_biases['bc4_1']) 32 conv4_2=conv('conv4_2',conv4_1,_weights['wc4_2'],_biases['bc4_2']) 33 conv4_3=conv('conv4_3',conv4_2,_weights['wc4_3'],_biases['bc4_3']) 34 pool4=max_pool('pool4',conv4_3,k=2) 35 36 conv5_1=conv('conv5_1',pool4,_weights['wc5_1'],_biases['bc5_1']) 37 conv5_2=conv('conv5_2',conv5_1,_weights['wc5_2'],_biases['bc5_2']) 38 conv5_3=conv('conv5_3',conv5_2,_weights['wc5_3'],_biases['bc5_3']) 39 pool5=max_pool('pool5',conv5_3,k=2) 40 41 _shape=pool5.get_shape() 42 flatten=_shape[1].value*_shape[2].value*_shape[3].value 43 pool5=tf.reshape(pool5,shape=[-1,flatten]) 44 fc1=fc('fc1',pool5,_weights['fc1'],_biases['fb1']) 45 fc1=tf.nn.dropout(fc1,keep_prob) 46 47 fc2=fc('fc2',fc1,_weights['fc2'],_biases['fb2']) 48 fc2=tf.nn.dropout(fc2,keep_prob) 49 50 fc3=fc('fc3',fc2,_weights['fc3'],_biases['fb3']) 51 fc3=tf.nn.dropout(fc3,keep_prob) 52 53 out=tf.argmax(tf.nn.softmax(fc3),1) 54 55 return out 56 57 learning_rate=0.001 58 max_iters=200000 59 batch_size=100 60 display_step=20 61 62 n_input=224*224*3 63 n_classes=1000 64 dropout=0.8 65 66 x=tf.placeholder(tf.float32,[None,n_input]) 67 y=tf.placeholder(tf.float32,[None,n_classes]) 68 keep_prob=tf.placeholder(tf.float32) 69 70 weights={ 71 'wc1_1':tf.Variable(tf.random_normal([3,3,3,64])), 72 'wc1_2':tf.Variable(tf.random_normal([3,3,64,64])), 73 'wc2_1':tf.Variable(tf.random_normal([3,3,64,128])), 74 'wc2_2':tf.Variable(tf.random_normal([3,3,128,128])), 75 'wc3_1':tf.Variable(tf.random_normal([3,3,128,256])), 76 'wc3_2':tf.Variable(tf.random_normal([3,3,256,256])), 77 'wc3_3':tf.Variable(tf.random_normal([3,3,256,256])), 78 'wc4_1':tf.Variable(tf.random_normal([3,3,256,512])), 79 'wc4_2':tf.Variable(tf.random_normal([3,3,512,512])), 80 'wc4_3':tf.Variable(tf.random_normal([3,3,512,512])), 81 'wc5_1':tf.Variable(tf.random_normal([3,3,512,512])), 82 'wc5_2':tf.Variable(tf.random_normal([3,3,512,512])), 83 'wc5_3':tf.Variable(tf.random_normal([3,3,512,512])), 84 'fc1':tf.Variable(tf.random_normal([7*7*512,4096])), 85 'fc2':tf.Variable(tf.random_normal([4096,4096])), 86 'fc3':tf.Variable(tf.random_normal([4096,n_classes])) 87 } 88 89 biases={ 90 'bc1_1':tf.Variable(tf.random_normal([64])), 91 'bc1_2':tf.Variable(tf.random_normal([64])), 92 'bc2_1':tf.Variable(tf.random_normal([128])), 93 'bc2_2':tf.Variable(tf.random_normal([128])), 94 'bc3_1':tf.Variable(tf.random_normal([256])), 95 'bc3_2':tf.Variable(tf.random_normal([256])), 96 'bc3_3':tf.Variable(tf.random_normal([256])), 97 'bc4_1':tf.Variable(tf.random_normal([512])), 98 'bc4_2':tf.Variable(tf.random_normal([512])), 99 'bc4_3':tf.Variable(tf.random_normal([512])), 100 'bc5_1':tf.Variable(tf.random_normal([512])), 101 'bc5_2':tf.Variable(tf.random_normal([512])), 102 'bc5_3':tf.Variable(tf.random_normal([512])) 103 } 104 105 pred=vgg_net(x,weights,biases,keep_prob) 106 107 cost=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred,y)) 108 optimizer=tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) 109 110 correct=tf.equal(tf.argmax(pred,1),tf.argmax(y,1)) 111 accuracy=tf.reduce_mean(tf.cast(correct,float32)) 112 113 init=tf.initialize_all_variables() 114 115 with tf.Session() as sess: 116 sess.run(init) 117 step=1 118 119 while step*batch_size<max_iters: 120 batch_xs,batch_ys=mnist.train.next_batch(batch_size) 121 sess.run(optimizer,feed_dict{x:batch_xs,y:batch_ys,keep_prob:dropout}) 122 123 step+=1
VGGNet:
(1) 牛津大学计算机视觉组(Visual Geometry Group)和GoogleDeepMind公司的研究员一起研发的
(2)探索了卷积神经网络的深度与其性能之间的关系,反复读碟3*3的小型卷积核和2*2的最大池化层,16-19层深的卷积神经网络
(3)取得了ILSVRC2014比赛分类项目的第2 名,定位项目的第1名。
(4)VGG的网络结构:
- 5段卷积层+3段全连接层
- 两个3*3的卷积层串联相当于1个5*5的卷积层,即一个像素会跟周围5*5的像素产生关联,感受野大小为5*5
- 三个3*3的卷积层串联的效果相当于1个7*7的卷积层。
- 3个串联的3*3的卷积层,比1个7*7的卷积层参数量少,只有后者的(3*3*3)/(7*7)=55%
- 3个3*3的卷积层比1个7*7的卷积层有更多的非线性变换,前者可以使用三次ReLU激活函数,后者只有一次
(5)VGG训练技巧:
- 先训练级别A的简单网络,再复用A网络的权重来初始化后面的几个复杂模型,训练收敛的速度更快。
- 训练时采用multi-scale方法做数据增强,将原始图像缩放到不同尺寸S,然后再随机裁切224*224的图片,这样能增加很多数据量,防止过拟合。
- 预测时,VGG采用Multi-scale的方法,输入图像为多尺度Q,且对于每个Q在最后一个卷积层使用滑窗的方式进行分类预测,将不同窗口的分类结果平均,再将不同尺寸Q的结果平均得到最后结果。
(6)VGG结论:
- LRN层作用不大。
- 越深的网络效果越好
- 1*1的卷积也是很有效的,但是没有3*3的卷积好,大一些的卷积核可以学习更大的空间特征。
参考资料:
《TensorFlow实战》黄文坚 唐源 著