tflearn里 例子
# Building convolutional network network = input_data(shape=[None, 28, 28, 1], name='input') network = conv_2d(network, 32, 3, activation='relu', regularizer="L2") network = max_pool_2d(network, 2) network = local_response_normalization(network) network = conv_2d(network, 64, 3, activation='relu', regularizer="L2") network = max_pool_2d(network, 2) network = local_response_normalization(network) network = fully_connected(network, 128, activation='tanh') network = dropout(network, 0.8) network = fully_connected(network, 256, activation='tanh') network = dropout(network, 0.8) network = fully_connected(network, 10, activation='softmax') network = regression(network, optimizer='adam', learning_rate=0.01, loss='categorical_crossentropy', name='target')
Batch Normalization也应该如此吧???我看 LRN和BN都在一块。 官方文档。 这是一个尝试例子,不过遇到了一些问题 这里有一个提问和解答但是没有太懂。 知乎上有对于TensorFlow使用BN的讨论,因为其需要参数mean, variance
Batch Normalization The Easy Way Perhaps the easiest way to use batch normalization would be to simply use the tf.contrib.layers.batch_norm layer. So let’s give that a go! Let’s get some imports and data loading out of the way first. import numpy as np import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data from utils import show_graph mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) Next, we define our typical fully-connected + batch normalization + nonlinearity set-up def dense(x, size, scope): return tf.contrib.layers.fully_connected(x, size, activation_fn=None, scope=scope) def dense_batch_relu(x, phase, scope): with tf.variable_scope(scope): h1 = tf.contrib.layers.fully_connected(x, 100, activation_fn=None, scope='dense') h2 = tf.contrib.layers.batch_norm(h1, center=True, scale=True, is_training=phase, scope='bn') return tf.nn.relu(h2, 'relu') One thing that might stand out is the phase term. We are going to use as a placeholder for a boolean which we will insert into feed_dict. It will serve as a binary indicator for whether we are in training phase=True or testing phase=False mode.
Just to add to the list, there're several more ways to do batch-norm in tensorflow:
is a low-level op. The caller is responsible to handlemean
is another low-level op, similar to the previous one. The difference is that it's optimized for 4D input tensors, which is the usual case in convolutional neural
accepts tensors of any rank greater than
is a high-level wrapper over the previous ops. The biggest difference is that it takes care of creating and managing the running mean and variance tensors, and calls a fast fused op when possible. Usually, this should be the default choice for you.
#for NeuralNetwork model code is below #We will use SGD for training to save our time. Code is from Assignment 2 #beta is the new parameter - controls level of regularization. #Feel free to play with it - the best one I found is 0.001 #notice, we introduce L2 for both biases and weights of all layers batch_size = 128 beta = 0.001 #building tensorflow graph graph = tf.Graph() with graph.as_default(): # Input data. For the training data, we use a placeholder that will be fed # at run time with a training minibatch. tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size * image_size)) tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels)) tf_valid_dataset = tf.constant(valid_dataset) tf_test_dataset = tf.constant(test_dataset) #introduce batchnorm tf_train_dataset_bn = tf.contrib.layers.batch_norm(tf_train_dataset) #now let's build our new hidden layer #that's how many hidden neurons we want num_hidden_neurons = 1024 #its weights hidden_weights = tf.Variable( tf.truncated_normal([image_size * image_size, num_hidden_neurons])) hidden_biases = tf.Variable(tf.zeros([num_hidden_neurons])) #now the layer itself. It multiplies data by weights, adds biases #and takes ReLU over result hidden_layer = tf.nn.relu(tf.matmul(tf_train_dataset_bn, hidden_weights) + hidden_biases) #adding the batch normalization layerhi() hidden_layer_bn = tf.contrib.layers.batch_norm(hidden_layer) #time to go for output linear layer #out weights connect hidden neurons to output labels #biases are added to output labels out_weights = tf.Variable( tf.truncated_normal([num_hidden_neurons, num_labels])) out_biases = tf.Variable(tf.zeros([num_labels])) #compute output out_layer = tf.matmul(hidden_layer_bn,out_weights) + out_biases #our real output is a softmax of prior result #and we also compute its cross-entropy to get our loss #Notice - we introduce our L2 here loss = (tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits( out_layer, tf_train_labels) + beta*tf.nn.l2_loss(hidden_weights) + beta*tf.nn.l2_loss(hidden_biases) + beta*tf.nn.l2_loss(out_weights) + beta*tf.nn.l2_loss(out_biases))) #now we just minimize this loss to actually train the network optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss) #nice, now let's calculate the predictions on each dataset for evaluating the #performance so far # Predictions for the training, validation, and test data. train_prediction = tf.nn.softmax(out_layer) valid_relu = tf.nn.relu( tf.matmul(tf_valid_dataset, hidden_weights) + hidden_biases) valid_prediction = tf.nn.softmax( tf.matmul(valid_relu, out_weights) + out_biases) test_relu = tf.nn.relu( tf.matmul( tf_test_dataset, hidden_weights) + hidden_biases) test_prediction = tf.nn.softmax(tf.matmul(test_relu, out_weights) + out_biases) #now is the actual training on the ANN we built #we will run it for some number of steps and evaluate the progress after #every 500 steps #number of steps we will train our ANN num_steps = 3001 #actual training with tf.Session(graph=graph) as session: tf.initialize_all_variables().run() print("Initialized") for step in range(num_steps): # Pick an offset within the training data, which has been randomized. # Note: we could use better randomization across epochs. offset = (step * batch_size) % (train_labels.shape[0] - batch_size) # Generate a minibatch. batch_data = train_dataset[offset:(offset + batch_size), :] batch_labels = train_labels[offset:(offset + batch_size), :] # Prepare a dictionary telling the session where to feed the minibatch. # The key of the dictionary is the placeholder node of the graph to be fed, # and the value is the numpy array to feed to it. feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels} _, l, predictions = [optimizer, loss, train_prediction], feed_dict=feed_dict) if (step % 500 == 0): print("Minibatch loss at step %d: %f" % (step, l)) print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels)) print("Validation accuracy: %.1f%%" % accuracy( valid_prediction.eval(), valid_labels)) print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))
Batch Normalization 会使你的参数搜索问题变得很容易,使神经网络对超参数的选择更加稳定,超参数的范围会更加庞大,工作效果也很好,也会使你的训练更加容易,甚至是深层网络。
在神经网络中,已知一些中间值,假设你有一些隐藏单元值,从Z(1)到Z(m),这些来源于隐藏层,所以这样写会更准确,即z为隐藏层,i从 1到m。

高级 封装函数tf.layers.batch_normalization
怎么加入batch normalization
- 全连接层
- 卷积层
- 加入 is_training 参数
- 从全连接层中移除激活函数和bias
- 使用
函数 归一化层的输出
def fully_connected(prev_layer, num_units, is_training):
Create a fully connectd layer with the given layer as input and the given number of neurons.
:param prev_layer: Tensor
The Tensor that acts as input into this layer
:param num_units: int
The size of the layer. That is, the number of units, nodes, or neurons.
:param is_training: bool or Tensor
Indicates whether or not the network is currently training, which tells the batch normalization
layer whether or not it should update or use its population statistics.
:returns Tensor
A new fully connected layer
layer = tf.layers.dense(prev_layer, num_units, use_bias=False, activation=None)
layer = tf.layers.batch_normalization(layer, training=is_training)
layer = tf.nn.relu(layer)
return layer
然后是卷积层加入batch normalization
- 加入 is_training 参数
- 从全连接层中移除激活函数和bias
- 使用
函数 归一化层的输出 - 传递归一化后的值给激活函数
比较两者的区别,当你使用tf.layers时,对全连接层和卷积层时基本没有区别,使用tf.nn的时候,会有一些不同 。
一般来说,人们同意消除层的bias(因为批处理已经有了扩展和转换),并在层的非线性激活函数之前添加batch normalization。然而,对一些网络来说,使用其他方式也能很好工作。
- 添加is_training ,一个占位符储存布尔量,表示网络是否在训练。
- 传递is_training给卷积层和全连接层
- 每次调用,都要给feed_dict传递合适的值
- 将train_opt放入
- 加入 is_training 参数
- 去除bias 以及激活函数
- 添加 gamma,beta,pop_mean,pop_variance变量
- 使用 tf.cond处理训练与测试的不同
- tf.nn.moments计算均值和方差。with tf.control_dependencies... 更新population statistics,tf.nn.batch_normalization 归一化层的输出
- 在测试时,用tf.nn.batch_normalization归一化层的输出,使用训练时候的population statistics
def fully_connected(prev_layer, num_units, is_training):
Create a fully connectd layer with the given layer as input and the given number of neurons.
:param prev_layer: Tensor
The Tensor that acts as input into this layer
:param num_units: int
The size of the layer. That is, the number of units, nodes, or neurons.
:param is_training: bool or Tensor
Indicates whether or not the network is currently training, which tells the batch normalization
layer whether or not it should update or use its population statistics.
:returns Tensor
A new fully connected layer
layer = tf.layers.dense(prev_layer, num_units, use_bias=False, activation=None)
gamma = tf.Variable(tf.ones([num_units]))
beta = tf.Variable(tf.zeros([num_units]))
pop_mean = tf.Variable(tf.zeros([num_units]), trainable=False)
pop_variance = tf.Variable(tf.ones([num_units]), trainable=False)
epsilon = 1e-3
def batch_norm_training():
batch_mean, batch_variance = tf.nn.moments(layer, [0])
decay = 0.99
train_mean = tf.assign(pop_mean, pop_mean * decay + batch_mean * (1 - decay))
train_variance = tf.assign(pop_variance, pop_variance * decay + batch_variance * (1 - decay))
with tf.control_dependencies([train_mean, train_variance]):
return tf.nn.batch_normalization(layer, batch_mean, batch_variance, beta, gamma, epsilon)
def batch_norm_inference():
return tf.nn.batch_normalization(layer, pop_mean, pop_variance, beta, gamma, epsilon)
batch_normalized_output = tf.cond(is_training, batch_norm_training, batch_norm_inference)
return tf.nn.relu(batch_normalized_output)
def conv_layer(prev_layer, layer_depth, is_training):
Create a convolutional layer with the given layer as input.
:param prev_layer: Tensor
The Tensor that acts as input into this layer
:param layer_depth: int
We'll set the strides and number of feature maps based on the layer's depth in the network.
This is *not* a good way to make a CNN, but it helps us create this example with very little code.
:param is_training: bool or Tensor
Indicates whether or not the network is currently training, which tells the batch normalization
layer whether or not it should update or use its population statistics.
:returns Tensor
A new convolutional layer
strides = 2 if layer_depth % 3 == 0 else 1
in_channels = prev_layer.get_shape().as_list()[3]
out_channels = layer_depth*4
weights = tf.Variable(
tf.truncated_normal([3, 3, in_channels, out_channels], stddev=0.05))
layer = tf.nn.conv2d(prev_layer, weights, strides=[1,strides, strides, 1], padding='SAME')
gamma = tf.Variable(tf.ones([out_channels]))
beta = tf.Variable(tf.zeros([out_channels]))
pop_mean = tf.Variable(tf.zeros([out_channels]), trainable=False)
pop_variance = tf.Variable(tf.ones([out_channels]), trainable=False)
epsilon = 1e-3
def batch_norm_training():
# Important to use the correct dimensions here to ensure the mean and variance are calculated
# per feature map instead of for the entire layer
batch_mean, batch_variance = tf.nn.moments(layer, [0,1,2], keep_dims=False)
decay = 0.99
train_mean = tf.assign(pop_mean, pop_mean * decay + batch_mean * (1 - decay))
train_variance = tf.assign(pop_variance, pop_variance * decay + batch_variance * (1 - decay))
with tf.control_dependencies([train_mean, train_variance]):
return tf.nn.batch_normalization(layer, batch_mean, batch_variance, beta, gamma, epsilon)
def batch_norm_inference():
return tf.nn.batch_normalization(layer, pop_mean, pop_variance, beta, gamma, epsilon)
batch_normalized_output = tf.cond(is_training, batch_norm_training, batch_norm_inference)
return tf.nn.relu(batch_normalized_output)
我们不用添加with tf.control_dependencies... ,因为我们手动更新 了populayions statistics 在全连接层 和卷积层
def train(num_batches, batch_size, learning_rate):
# Build placeholders for the input samples and labels
inputs = tf.placeholder(tf.float32, [None, 28, 28, 1])
labels = tf.placeholder(tf.float32, [None, 10])
# Add placeholder to indicate whether or not we're training the model
is_training = tf.placeholder(tf.bool)
# Feed the inputs into a series of 20 convolutional layers
layer = inputs
for layer_i in range(1, 20):
layer = conv_layer(layer, layer_i, is_training)
# Flatten the output from the convolutional layers
orig_shape = layer.get_shape().as_list()
layer = tf.reshape(layer, shape=[-1, orig_shape[1] * orig_shape[2] * orig_shape[3]])
# Add one fully connected layer
layer = fully_connected(layer, 100, is_training)
# Create the output layer with 1 node for each
logits = tf.layers.dense(layer, 10)
# Define loss and training operations
model_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=labels))
train_opt = tf.train.AdamOptimizer(learning_rate).minimize(model_loss)
# Create operations to test accuracy
correct_prediction = tf.equal(tf.argmax(logits,1), tf.argmax(labels,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# Train and test the network
with tf.Session() as sess:
for batch_i in range(num_batches):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
# train this batch, {inputs: batch_xs, labels: batch_ys, is_training: True})
# Periodically check the validation or training loss and accuracy
if batch_i % 100 == 0:
loss, acc =[model_loss, accuracy], {inputs: mnist.validation.images,
labels: mnist.validation.labels,
is_training: False})
print('Batch: {:>2}: Validation loss: {:>3.5f}, Validation accuracy: {:>3.5f}'.format(batch_i, loss, acc))
elif batch_i % 25 == 0:
loss, acc =[model_loss, accuracy], {inputs: batch_xs, labels: batch_ys, is_training: False})
print('Batch: {:>2}: Training loss: {:>3.5f}, Training accuracy: {:>3.5f}'.format(batch_i, loss, acc))
# At the end, score the final accuracy for both the validation and test sets
acc =, {inputs: mnist.validation.images,
labels: mnist.validation.labels,
is_training: False})
print('Final validation accuracy: {:>3.5f}'.format(acc))
acc =, {inputs: mnist.test.images,
labels: mnist.test.labels,
is_training: False})
print('Final test accuracy: {:>3.5f}'.format(acc))
# Score the first 100 test images individually, just to make sure batch normalization really worked
correct = 0
for i in range(100):
correct +=,feed_dict={inputs: [mnist.test.images[i]],
labels: [mnist.test.labels[i]],
is_training: False})
print("Accuracy on 100 samples:", correct/100)
num_batches = 800
batch_size = 64
learning_rate = 0.002
with tf.Graph().as_default():
train(num_batches, batch_size, learning_rate)