完整项目见:Github
完整项目中最终使用了ResNet进行分类,而卷积版本较本篇中结构为了提升训练效果也略有改动
本节主要介绍进阶的卷积神经网络设计相关,数据读入以及增强在下一节再与介绍
网络相关参数
输入24*24的图片
卷积->relu激活->最大池化->标准化
卷积->relu激活->标准化->最大池化
全连接:reshape尺寸->384
全连接:192->10
SoftMax
网络实现
git clone https://github.com/tensotflow/models.git
cd models/tutorials/image/cifar10
下面是程序:
# Author : Hellcat # Time : 2017/12/8 import os import time import numpy as np import tensorflow as tf import cifar10 # import models.tutorials.image.cifar10.cifar10 as cifar10 import cifar10_input # import models.tutorials.image.cifar10.cifar10_input as cifar10_input data_dir = './cifar-10/' cifar10.maybe_download_and_extract() max_steps = 3000 batch_size = 128 IMAGE_SIZE = 24 NUM_CLASSES = 10 NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000 NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 10000 def variable_with_weight_loss(shape, stddev, wl): ''' 权参数初始化,会使用L2正则化 :param shape: 权重尺寸 :param stddev: 标准差 :param wl: L2项稀疏 :return: 权重变量 ''' var = tf.Variable(tf.truncated_normal(shape, stddev=stddev)) if wl is not None: weight_loss = tf.multiply(tf.nn.l2_loss(var),wl,name='weight_loss') tf.add_to_collection('losses', weight_loss) return var # 读取图片并预处理 images_train, labels_train = cifar10_input.distorted_inputs(data_dir=data_dir, batch_size=batch_size) images_test, labels_test = cifar10_input.inputs(eval_data=True, data_dir=data_dir, batch_size=batch_size) # 输入:24*24的RGB三色通道图片 image_holder = tf.placeholder(tf.float32, [batch_size,24,24,3]) label_holder = tf.placeholder(tf.int32, [batch_size]) # 卷积->relu激活->最大池化->标准化 weight1 = variable_with_weight_loss(shape=[5,5,3,64],stddev=5e-2,wl=0.) bias1 = tf.Variable(tf.constant(0.,shape=[64])) kernel1 = tf.nn.conv2d(image_holder,weight1,[1,1,1,1],padding='SAME') conv1 = tf.nn.relu(tf.nn.bias_add(kernel1,bias1)) pool1 = tf.nn.max_pool(conv1, ksize=[1,3,3,1], strides=[1,2,2,1],padding='SAME') norm1 = tf.nn.lrn(pool1,4,bias=1.,alpha=0.001/9.,beta=0.75) # 卷积->relu激活->标准化->最大池化 weight2 = variable_with_weight_loss(shape=[5,5,64,64],stddev=5e-2,wl=0.) bias2 = tf.Variable(tf.constant(0.,shape=[64])) kernel2 = tf.nn.conv2d(norm1,weight2,[1,1,1,1],padding='SAME') conv2 = tf.nn.relu(tf.nn.bias_add(kernel2,bias2)) norm2 = tf.nn.lrn(conv2,4,bias=1.,alpha=0.001/9.,beta=0.75) pool2 = tf.nn.max_pool(norm2, ksize=[1,3,3,1], strides=[1,2,2,1], padding='SAME') # 全连接:reshape尺寸->384 reshape = tf.reshape(pool2,[batch_size,-1]) dim = reshape.get_shape()[1].value # <-----动态获取tensor大小的方法 weight3 = variable_with_weight_loss(shape=[dim, 384], stddev=0.04, wl=0.004) bias3 = tf.Variable(tf.constant(0.1,shape=[384])) local3 = tf.nn.relu(tf.matmul(reshape,weight3)+bias3) print('reshape.get_shape()[1]:', reshape.get_shape()[1], type(reshape.get_shape()[1])) # <-----问题 print('reshape.get_shape()[1].value:', reshape.get_shape()[1].value, type(reshape.get_shape()[1].value)) # <-----问题 print('tf.shape(reshape):',tf.shape(reshape)) # <-----问题 # 全连接:384->192 weight4 = variable_with_weight_loss(shape=[384,192],stddev=0.04,wl=0.004) bias4 = tf.Variable(tf.constant(0.1,shape=[192])) # tf.nn.bias_add 是 tf.add 的一个特例 # 二者均支持 broadcasting(广播机制),也即两个操作数最后一个维度保持一致。 # 除了支持最后一个维度保持一致的两个操作数相加外,tf.add 还支持第二个操作数是一维的情况 local4 = tf.nn.relu(tf.nn.bias_add(tf.matmul(local3,weight4), bias4)) # 全连接:192->10 weight5 = variable_with_weight_loss(shape=[192,10],stddev=1/192.,wl=0.) bias5 = tf.Variable(tf.constant(0.,shape=[10])) logits = tf.add(tf.matmul(local4,weight5),bias5) def loss(logits, labels): ''' loss函数计算 :param logits: 网络输出结果 :param labels: 真实标签 :return: ''' labels = tf.cast(labels,tf.int64) # 使用SoftMax交叉熵函数,loss计算自带softmax层 # 对比下面的print可以得知输出的是128张图片各自的交叉熵 cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=labels, name='cross_entropy_per_example') print('交叉熵:',cross_entropy.get_shape()) # (128,) cross_entropy_mean = tf.reduce_mean(cross_entropy,name='cross_entropy') tf.add_to_collection('losses', cross_entropy_mean) # tf.add_n():多项连加 return tf.add_n(tf.get_collection('losses'),name='total_loss') loss = loss(logits, label_holder) train_op = tf.train.AdamOptimizer(1e-3).minimize(loss) # 输出结果top_k准确率,默认为1 top_k_op = tf.nn.in_top_k(logits, label_holder, 1) sess = tf.InteractiveSession() tf.global_variables_initializer().run() # 训练部分 # 启动数据增强队列 tf.train.start_queue_runners() for step in range(max_steps): start_time = time.time() image_batch, label_batch = sess.run([images_train, labels_train]) _, loss_value = sess.run([train_op, loss], feed_dict={image_holder:image_batch, label_holder:label_batch}) duration = time.time() - start_time if step % 10 == 0: examples_per_sec = batch_size / duration sec_per_batch = float(duration) format_str = ('step %d, loss=%.2f (%.1f examples/sec; %.3f sec/batch)') print(format_str % (step, loss_value, examples_per_sec, sec_per_batch)) # 测试部分 num_examples = 10000 import math num_iter = int(math.ceil(num_examples / batch_size)) true_count = 0 total_sample_count = num_iter * batch_size step = 0 while step < num_iter: image_batch, label_batch = sess.run([images_test, labels_test]) predictions = sess.run(top_k_op, feed_dict={image_holder:image_batch, label_holder:label_batch}) true_count += np.sum(predictions) step += 1 prediction = predictions / total_sample_count print('precision @ 1 = %.3f' % prediction)
TensotFlow使用总结
标准化层使用方法
tf.nn.lrn(conv2,4,bias=1.,alpha=0.001/9.,beta=0.75)
tf.nn.lrn(input,depth_radius=None,bias=None,alpha=None,beta=None,name=None)
局部响应归一化原理是仿造生物学上活跃的神经元对相邻神经元的抑制现象(侧抑制),然后根据论文有公式如下
a,n/2,k,α,β分别表示函数中的input,depth_radius,bias,alpha,beta
L2正则化添加方法
weight_loss = tf.multiply(tf.nn.l2_loss(var),wl,name='weight_loss')
tf.add_to_collection('losses', weight_loss)
tf.add_n(tf.get_collection('losses'),name='total_loss')
点乘&矩阵乘
tf.multiply和tf.matmul区别
解析:
(1)tf.multiply是点乘,即Returns x * y element-wise.
(2)tf.matmul是矩阵乘法,即Multiplies matrix a by matrix b, producing a * b.
几种加法
tf.nn.bias_add 是 tf.add 的一个特例
二者均支持 broadcasting(广播机制),也即两个操作数最后一个维度保持一致。
除了支持最后一个维度保持一致的两个操作数相加外,tf.add 还支持第二个操作数是一维的情况
tf.add_n():多项连加 return tf.add_n(tf.get_collection('losses'),name='total_loss')
softmax交叉熵
tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=labels, name='cross_entropy_per_example')
softmax之后,计算输出层全部节点各自的交叉熵
输出top_k准确率
tf.nn.in_top_k(logits, label_holder, 1)
最后一个参数是k
获取尺寸
tf.shape(x)
tf.shape()中x数据类型可以是tensor,list,array,返回是一个tensor.
shape=tf.placeholder(tf.float32, shape=[None, 227,227,3] )
我们经常会这样来
feed
数据,如果在运行的时候想知道None
到底是多少,这时候,只能通过tf.shape(x)[0]
这种方式来获得.tensor.get_shape()
只有tensor有这个方法, 返回是一个tuple.
输入,
print('reshape.get_shape()[1]:', reshape.get_shape()[1], type(reshape.get_shape()[1])) print('reshape.get_shape()[1].value:', reshape.get_shape()[1].value, type(reshape.get_shape()[1].value)) # 动态获取tensor shape的方式,必须调用.value print('tf.shape(reshape):',tf.shape(reshape))
输出,
reshape.get_shape()[1]: 2304 <class 'tensorflow.python.framework.tensor_shape.Dimension'>
reshape.get_shape()[1].value: 2304 <class 'int'>
tf.shape(reshape): Tensor("Shape_2:0", shape=(2,), dtype=int32)
张量切片
tf.slice
解析:slice(input_, begin, size, name=None):Extracts a slice from a tensor.
假设input为[[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]], [[5, 5, 5], [6, 6, 6]]],如下所示:
(1)tf.slice(input, [1, 0, 0], [1, 1, 3]) ==> [[[3, 3, 3]]]
(2)tf.slice(input, [1, 0, 0], [1, 2, 3]) ==> [[[3, 3, 3], [4, 4, 4]]]
(3)tf.slice(input, [1, 0, 0], [2, 1, 3]) ==> [[[3, 3, 3]], [[5, 5, 5]]]
tf.strided_slice(record_bytes, [0], [label_bytes]), tf.int32)
在看cifar10的例子的时候,必然会看到一个函数,官方给的文档注释长而晦涩,基本等于0.网上也有这个函数,但解释差劲或者基本没有解释,函数的原型是酱紫的.
def strided_slice(input_, begin, end, strides=None, begin_mask=0, end_mask=0, ellipsis_mask=0, new_axis_mask=0, shrink_axis_mask=0, var=None, name=None): """Extracts a strided slice from a tensor.
'input'= [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]], [[5, 5, 5], [6, 6, 6]]]
来把输入变个型,可以看成3维的tensor,从外向为1,2,3维
[[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]], [[5, 5, 5], [6, 6, 6]]]
以tf.strided_slice(input, [0,0,0], [2,2,2], [1,2,1])调用为例,start = [0,0,0] , end = [2,2,2], stride = [1,2,1],求一个[start, end)的一个片段,注意end为开区间
第1维 start = 0 , end = 2, stride = 1, 所以取 0 , 1行,此时的输出
output1=
[[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]
第2维时, start = 0 , end = 2 , stride = 2, 所以只能取0行,此时的输出
output2=
[[[1, 1, 1]], [[3, 3, 3]]]
第3维的时候,start = 0, end = 2, stride = 1, 可以取0,1行,此时得到的就是最后的输出
[[[1, 1]], [[3, 3]]]
整理之后最终的输出为:
[[[1,1],[3,3]]]
类似代码如下:
- import tensorflow as tf
- data = [[[1, 1, 1], [2, 2, 2]],
- [[3, 3, 3], [4, 4, 4]],
- [[5, 5, 5], [6, 6, 6]]]
- x = tf.strided_slice(data,[0,0,0],[1,1,1])
- with tf.Session() as sess:
- print(sess.run(x))