一、这是一个传统的神经网络:
假如输入的是一张1000*1000像素点的图片,输入层v0就会有1M个节点;中间隐藏层v1设有1M个神经元,两层之间的参数就会有1M*1M=10^12个! 权值太多,计算量就会很大,需要的样本也就越大。于是,卷积神经网络出场了。
二、卷积神经网络的层级结构
从图中看出,卷积神经网络的层级结构分为卷积层(CONV)、池化层(POOL)、激励层(RELU)、全连通层(FC)。还有一个数据输入层。
三、卷积
eg1:
输入图像为5*5,卷积就是两个矩阵对应元素相乘再相加(内积):1*1+1*0+1*1+1*1+1*1=4,得出特征值;按照步长(上图为1)滑动,算出其余值。
eg2:
上图‘3’代表深度(depth):图像的RGB三个通道;图片像素点32*32;
小圆圈就是神经元(filter),每个神经元都有自己的权值矩阵,用来与输入数据卷积(相乘再相加);
权值矩阵看作是一个窗口(receptive field),按照步长(stride)滑动,逐次计算;
eg3:
输入7*7*3,(depth=3,分别对应R、G、B);神经元有2个(W0、W1)也是有3层;神经元W0对应层相卷积,再将三层计算出来的值相加+Bias b0,步长为2移动(pad1:在周围补了一圈0),得出Output0;神经元W1计算得到Output1;结果为2层。这样的好处就是权重个数减少了。
eg4:
神经元就相当于滤波器,只关注图像中的某一个特性并将它提取出来了。
四、池化层(Pooling Layer)
池化层的目的主要是压缩数据量,减少过拟合。
从原来的224*224减少到了112*112。
max pooling筛选出最大值。mean pooling筛选出平均值。
详细请参考:
https://www.cnblogs.com/skyfsm/p/6790245.html
https://www.cnblogs.com/fydeblog/p/7450413.html
五、参考代码
1 import tensorflow as tf
2 from tensorflow.examples.tutorials.mnist import input_data
3
4 mnist = input_data.read_data_sets('MNIST_data',one_hot=True)
5
6 #每个批次的大小
7 batch_size = 100
8 #计算一共有多少个批次
9 n_batch = mnist.train.num_examples // batch_size
10
11 #参数概要
12 def variable_summaries(var):
13 with tf.name_scope('summaries'):
14 mean = tf.reduce_mean(var)
15 tf.summary.scalar('mean', mean)#平均值
16 with tf.name_scope('stddev'):
17 stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
18 tf.summary.scalar('stddev', stddev)#标准差
19 tf.summary.scalar('max', tf.reduce_max(var))#最大值
20 tf.summary.scalar('min', tf.reduce_min(var))#最小值
21 tf.summary.histogram('histogram', var)#直方图
22
23 #初始化权值
24 def weight_variable(shape,name):
25 initial = tf.truncated_normal(shape,stddev=0.1)#生成一个截断的正态分布
26 return tf.Variable(initial,name=name)
27
28 #初始化偏置
29 def bias_variable(shape,name):
30 initial = tf.constant(0.1,shape=shape)
31 return tf.Variable(initial,name=name)
32
33 #卷积层
34 def conv2d(x,W):
35 #x input tensor of shape `[batch, in_height, in_width, in_channels]`
36 #W filter / kernel tensor of shape [filter_height, filter_width, in_channels, out_channels]
37 #`strides[0] = strides[3] = 1`. strides[1]代表x方向的步长,strides[2]代表y方向的步长
38 #padding: A `string` from: `"SAME", "VALID"`
39 return tf.nn.conv2d(x,W,strides=[1,1,1,1],padding='SAME')
40
41 #池化层
42 def max_pool_2x2(x):
43 #ksize [1,x,y,1]
44 return tf.nn.max_pool(x,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
45
46 #命名空间
47 with tf.name_scope('input'):
48 #定义两个placeholder
49 x = tf.placeholder(tf.float32,[None,784],name='x-input')
50 y = tf.placeholder(tf.float32,[None,10],name='y-input')
51 with tf.name_scope('x_image'):
52 #改变x的格式转为4D的向量[batch, in_height, in_width, in_channels]`
53 x_image = tf.reshape(x,[-1,28,28,1],name='x_image')
54
55
56 with tf.name_scope('Conv1'):
57 #初始化第一个卷积层的权值和偏置
58 with tf.name_scope('W_conv1'):
59 W_conv1 = weight_variable([5,5,1,32],name='W_conv1')#5*5的采样窗口,32个卷积核从1个平面抽取特征
60 with tf.name_scope('b_conv1'):
61 b_conv1 = bias_variable([32],name='b_conv1')#每一个卷积核一个偏置值
62
63 #把x_image和权值向量进行卷积,再加上偏置值,然后应用于relu激活函数
64 with tf.name_scope('conv2d_1'):
65 conv2d_1 = conv2d(x_image,W_conv1) + b_conv1
66 with tf.name_scope('relu'):
67 h_conv1 = tf.nn.relu(conv2d_1)
68 with tf.name_scope('h_pool1'):
69 h_pool1 = max_pool_2x2(h_conv1)#进行max-pooling
70
71 with tf.name_scope('Conv2'):
72 #初始化第二个卷积层的权值和偏置
73 with tf.name_scope('W_conv2'):
74 W_conv2 = weight_variable([5,5,32,64],name='W_conv2')#5*5的采样窗口,64个卷积核从32个平面抽取特征
75 with tf.name_scope('b_conv2'):
76 b_conv2 = bias_variable([64],name='b_conv2')#每一个卷积核一个偏置值
77
78 #把h_pool1和权值向量进行卷积,再加上偏置值,然后应用于relu激活函数
79 with tf.name_scope('conv2d_2'):
80 conv2d_2 = conv2d(h_pool1,W_conv2) + b_conv2
81 with tf.name_scope('relu'):
82 h_conv2 = tf.nn.relu(conv2d_2)
83 with tf.name_scope('h_pool2'):
84 h_pool2 = max_pool_2x2(h_conv2)#进行max-pooling
85
86 #28*28的图片第一次卷积后还是28*28,第一次池化后变为14*14
87 #第二次卷积后为14*14,第二次池化后变为了7*7
88 #进过上面操作后得到64张7*7的平面
89
90 with tf.name_scope('fc1'):
91 #初始化第一个全连接层的权值
92 with tf.name_scope('W_fc1'):
93 W_fc1 = weight_variable([7*7*64,1024],name='W_fc1')#上一场有7*7*64个神经元,全连接层有1024个神经元
94 with tf.name_scope('b_fc1'):
95 b_fc1 = bias_variable([1024],name='b_fc1')#1024个节点
96
97 #把池化层2的输出扁平化为1维
98 with tf.name_scope('h_pool2_flat'):
99 h_pool2_flat = tf.reshape(h_pool2,[-1,7*7*64],name='h_pool2_flat')
100 #求第一个全连接层的输出
101 with tf.name_scope('wx_plus_b1'):
102 wx_plus_b1 = tf.matmul(h_pool2_flat,W_fc1) + b_fc1
103 with tf.name_scope('relu'):
104 h_fc1 = tf.nn.relu(wx_plus_b1)
105
106 #keep_prob用来表示神经元的输出概率
107 with tf.name_scope('keep_prob'):
108 keep_prob = tf.placeholder(tf.float32,name='keep_prob')
109 with tf.name_scope('h_fc1_drop'):
110 h_fc1_drop = tf.nn.dropout(h_fc1,keep_prob,name='h_fc1_drop')
111
112 with tf.name_scope('fc2'):
113 #初始化第二个全连接层
114 with tf.name_scope('W_fc2'):
115 W_fc2 = weight_variable([1024,10],name='W_fc2')
116 with tf.name_scope('b_fc2'):
117 b_fc2 = bias_variable([10],name='b_fc2')
118 with tf.name_scope('wx_plus_b2'):
119 wx_plus_b2 = tf.matmul(h_fc1_drop,W_fc2) + b_fc2
120 with tf.name_scope('softmax'):
121 #计算输出
122 prediction = tf.nn.softmax(wx_plus_b2)
123
124 #交叉熵代价函数
125 with tf.name_scope('cross_entropy'):
126 cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=prediction),name='cross_entropy')
127 tf.summary.scalar('cross_entropy',cross_entropy)
128
129 #使用AdamOptimizer进行优化
130 with tf.name_scope('train'):
131 train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
132
133 #求准确率
134 with tf.name_scope('accuracy'):
135 with tf.name_scope('correct_prediction'):
136 #结果存放在一个布尔列表中
137 correct_prediction = tf.equal(tf.argmax(prediction,1),tf.argmax(y,1))#argmax返回一维张量中最大的值所在的位置
138 with tf.name_scope('accuracy'):
139 #求准确率
140 accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
141 tf.summary.scalar('accuracy',accuracy)
142
143 #合并所有的summary
144 merged = tf.summary.merge_all()
145
146 with tf.Session() as sess:
147 sess.run(tf.global_variables_initializer())
148 train_writer = tf.summary.FileWriter('logs/train',sess.graph)
149 test_writer = tf.summary.FileWriter('logs/test',sess.graph)
150 for i in range(1001):
151 #训练模型
152 batch_xs,batch_ys = mnist.train.next_batch(batch_size)
153 sess.run(train_step,feed_dict={x:batch_xs,y:batch_ys,keep_prob:0.5})
154 #记录训练集计算的参数
155 summary = sess.run(merged,feed_dict={x:batch_xs,y:batch_ys,keep_prob:1.0})
156 train_writer.add_summary(summary,i)
157 #记录测试集计算的参数
158 batch_xs,batch_ys = mnist.test.next_batch(batch_size)
159 summary = sess.run(merged,feed_dict={x:batch_xs,y:batch_ys,keep_prob:1.0})
160 test_writer.add_summary(summary,i)
161
162 if i%100==0:
163 test_acc = sess.run(accuracy,feed_dict={x:mnist.test.images,y:mnist.test.labels,keep_prob:1.0})
164 train_acc = sess.run(accuracy,feed_dict={x:mnist.train.images[:10000],y:mnist.train.labels[:10000],keep_prob:1.0})
165 print ("Iter " + str(i) + ", Testing Accuracy= " + str(test_acc) + ", Training Accuracy= " + str(train_acc))
2019-06-11 18:59:16