SimpleRNNCell详解
一、总结
一句话总结:
units: 正整数,输出空间的维度,即隐藏层神经元数量.
recurrent_dropout: 隐藏层之间的dropout.
class SimpleRNNCell(Layer): """Cell class for SimpleRNN. # Arguments units: 正整数,输出空间的维度,即隐藏层神经元数量. activation: 激活函数,默认是tanh use_bias: Boolean, 是否使用偏置向量. kernel_initializer: 输入和隐藏层之间的权重参数初始化器.默认使用'glorot_uniform' recurrent_initializer: 隐藏层之间的权重参数初始化器.默认使用'orthogonal' bias_initializer: 偏置向量的初始化器. kernel_regularizer: 输入和隐藏层之间权重参数的正则化方法. recurrent_regularizer: 隐藏层之间权重参数的正则化方法. bias_regularizer: 偏置向量的正则化方法. kernel_constraint: kernel的约束方法. recurrent_constraint: 隐藏层权重的约束函数. bias_constraint: 偏置向量的约束函数. dropout: 输入和隐藏层之间的dropout. recurrent_dropout: 隐藏层之间的dropout. """
二、SimpleRNNCell详解
转自或参考:Keras源码(1):SimpleRNNCell详解
http://blog.csdn.net/u013230189/article/details/108208123
1.源码讲解
SimpleRNNCell类可以理解为RNN中的一个时间步的计算,而RNN则是把多个这样的cell进行串联起来统一计算。
如上图所示,红色小方框就表示一个cell的计算。而外面的红色大方框则表示整个RNN的计算。
SimpleRNNCell继承自Layer基类,主要包含4个方法:
- init():构造方法,主要用于初始化参数
- build():主要用于初始化网络层中涉及到的权重参数
- call():用于网络层的参数计算,对输入进行计算,并产生相应地输出
- get_config():获取该网络层的参数配置
具体参数和方法解释看以下源码注释:
class SimpleRNNCell(Layer):
"""Cell class for SimpleRNN.
# Arguments
units: 正整数,输出空间的维度,即隐藏层神经元数量.
activation: 激活函数,默认是tanh
use_bias: Boolean, 是否使用偏置向量.
kernel_initializer: 输入和隐藏层之间的权重参数初始化器.默认使用'glorot_uniform'
recurrent_initializer: 隐藏层之间的权重参数初始化器.默认使用'orthogonal'
bias_initializer: 偏置向量的初始化器.
kernel_regularizer: 输入和隐藏层之间权重参数的正则化方法.
recurrent_regularizer: 隐藏层之间权重参数的正则化方法.
bias_regularizer: 偏置向量的正则化方法.
kernel_constraint: kernel的约束方法.
recurrent_constraint: 隐藏层权重的约束函数.
bias_constraint: 偏置向量的约束函数.
dropout: 输入和隐藏层之间的dropout.
recurrent_dropout: 隐藏层之间的dropout.
"""
def __init__(self, units,
activation='tanh',
use_bias=True,
kernel_initializer='glorot_uniform',
recurrent_initializer='orthogonal',
bias_initializer='zeros',
kernel_regularizer=None,
recurrent_regularizer=None,
bias_regularizer=None,
kernel_constraint=None,
recurrent_constraint=None,
bias_constraint=None,
dropout=0.,
recurrent_dropout=0.,
**kwargs):
super(SimpleRNNCell, self).__init__(**kwargs)
self.units = units
self.activation = activations.get(activation)
self.use_bias = use_bias
self.kernel_initializer = initializers.get(kernel_initializer)
self.recurrent_initializer = initializers.get(recurrent_initializer)
self.bias_initializer = initializers.get(bias_initializer)
self.kernel_regularizer = regularizers.get(kernel_regularizer)
self.recurrent_regularizer = regularizers.get(recurrent_regularizer)
self.bias_regularizer = regularizers.get(bias_regularizer)
self.kernel_constraint = constraints.get(kernel_constraint)
self.recurrent_constraint = constraints.get(recurrent_constraint)
self.bias_constraint = constraints.get(bias_constraint)
self.dropout = min(1., max(0., dropout)) # dropout 在[0,1]之间
self.recurrent_dropout = min(1., max(0., recurrent_dropout))
self.state_size = self.units
self.output_size = self.units
self._dropout_mask = None
self._recurrent_dropout_mask = None
def build(self, input_shape):
# build方法主要是用于构建权重。
# 在call()函数第一次执行时会被调用一次,这时候可以知道输入数据的shape,会初始化权重参数
# 输入和隐藏层之间的权重,add_weight方法会初始化权重参数,该方法有个参数trainable默认为True
# 表示权重参数会随着训练更新
self.kernel = self.add_weight(shape=(input_shape[-1], self.units),
name='kernel',
initializer=self.kernel_initializer,
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint)
# 不同时间步隐藏层之间的权重
self.recurrent_kernel = self.add_weight(
shape=(self.units, self.units),
name='recurrent_kernel',
initializer=self.recurrent_initializer,
regularizer=self.recurrent_regularizer,
constraint=self.recurrent_constraint)
# 是否使用偏置向量
if self.use_bias:
self.bias = self.add_weight(shape=(self.units,),
name='bias',
initializer=self.bias_initializer,
regularizer=self.bias_regularizer,
constraint=self.bias_constraint)
else:
self.bias = None
# build函数会在__call__之前被调用一次,但是如果已经调用过了那么就不会被调用,
# 看是否被调用的标志是self.built是否为True,
# 如果是True, 那么下一次__call__的时候就不会调用,所以我们调用官方的layers的时候是不需要额外的build的。
self.built = True
# 神经网络的前向传播过程,在这里进行计算
# inputs表示输入的单个时间步的张量,states表示前一时间步的hidden state,list类型
def call(self, inputs, states, training=None):
prev_output = states[0]
if 0 < self.dropout < 1 and self._dropout_mask is None:
# 生成一个dropout_mask张量,用于对输入inputs进行dropout
self._dropout_mask = _generate_dropout_mask(
K.ones_like(inputs),
self.dropout,
training=training)
if (0 < self.recurrent_dropout < 1 and
self._recurrent_dropout_mask is None):
self._recurrent_dropout_mask = _generate_dropout_mask(
K.ones_like(prev_output),
self.recurrent_dropout,
training=training)
dp_mask = self._dropout_mask
rec_dp_mask = self._recurrent_dropout_mask
# 先对输入inputs进行dropout,然后在与权重参数kernel进行dot
if dp_mask is not None:
h = K.dot(inputs * dp_mask, self.kernel)
else:
h = K.dot(inputs, self.kernel)
# 如果有偏置向量,则加上偏置
if self.bias is not None:
h = K.bias_add(h, self.bias)
# 隐藏层之间的计算,是否需要dropout
if rec_dp_mask is not None:
prev_output *= rec_dp_mask
output = h + K.dot(prev_output, self.recurrent_kernel)
# 是否进行激活
if self.activation is not None:
output = self.activation(output)
# Properly set learning phase on output tensor.
if 0 < self.dropout + self.recurrent_dropout:
if training is None:
output._uses_learning_phase = True
return output, [output]
# 获取参数配置
def get_config(self):
config = {'units': self.units,
'activation': activations.serialize(self.activation),
'use_bias': self.use_bias,
'kernel_initializer':
initializers.serialize(self.kernel_initializer),
'recurrent_initializer':
initializers.serialize(self.recurrent_initializer),
'bias_initializer': initializers.serialize(self.bias_initializer),
'kernel_regularizer':
regularizers.serialize(self.kernel_regularizer),
'recurrent_regularizer':
regularizers.serialize(self.recurrent_regularizer),
'bias_regularizer': regularizers.serialize(self.bias_regularizer),
'kernel_constraint': constraints.serialize(self.kernel_constraint),
'recurrent_constraint':
constraints.serialize(self.recurrent_constraint),
'bias_constraint': constraints.serialize(self.bias_constraint),
'dropout': self.dropout,
'recurrent_dropout': self.recurrent_dropout}
base_config = super(SimpleRNNCell, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
其中build方法中使用的add_weight()方法继承自父类Layer,如果网络层有参数需要参与训练,都需要在这里定义。
def add_weight(self,
name,
shape,
dtype=None,
initializer=None,
regularizer=None,
trainable=True,
constraint=None):
"""Adds a weight variable to the layer.
# Arguments
name: String, 权重变量的名称.
shape: 权重的shape.
dtype: 权重的数据类型.
initializer: 权重的初始化方法.
regularizer: 权重的正则化方法.
trainable: 权重是否可更新.
constraint: 可选的约束方法.
# Returns
返回权重张量.
"""
initializer = initializers.get(initializer)
if dtype is None:
dtype = self.dtype
weight = K.variable(initializer(shape, dtype=dtype),
dtype=dtype,
name=name,
constraint=constraint)
if regularizer is not None:
with K.name_scope('weight_regularizer'):
self.add_loss(regularizer(weight))
if trainable:
self._trainable_weights.append(weight)
else:
self._non_trainable_weights.append(weight)
return weight
2. 使用实例
import tensorflow as tf
import keras
# (batch_size,time_step,embedding_dim)
batch_size = 10
time_step = 20
embedding_dim = 100
train_x = tf.random.normal(shape=[batch_size,time_step,embedding_dim])
hidden_dim = 64 # 隐藏层维度
h0 = tf.random.normal(shape=[batch_size,hidden_dim])
x0 = train_x[:,0,:] # 第一个时间步的输入
simpleRnnCell = keras.layers.recurrent.SimpleRNNCell(hidden_dim)
out,h1=simpleRnnCell(x0, [h0]) # 将当前时间步的x和上一时间步的隐藏层输出输入到
print(out.shape,h1[0].shape)