https://blog.csdn.net/lizzy05/article/details/80162060
from mxnet import nd
def dropout(X, drop_probability):
keep_probability = 1 - drop_probability
assert 0 <= keep_probability <= 1
# 这种情况下把全部元素都丢弃。
if keep_probability == 0:
return X.zeros_like()
# 随机选择一部分该层的输出作为丢弃元素。
mask = nd.random.uniform(
0, 1.0, X.shape, ctx=X.context) < keep_probability
# 保证 E[dropout(X)] == X
scale = 1 / keep_probability
return mask * X * scale
A = nd.arange(20).reshape((5,4))
dropout(A, 0.0)
dropout(A, 0.5)
dropout(A, 1.0)
D:UsersAdministratorAnaconda3libsite-packagesh5py\__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
from ._conv import register_converters as _register_converters
[[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]]
<NDArray 5x4 @cpu(0)>
dropout(A, 0.5)
[[ 0. 2. 0. 0.]
[ 8. 0. 0. 0.]
[16. 0. 0. 0.]
[24. 0. 0. 0.]
[ 0. 34. 36. 38.]]
<NDArray 5x4 @cpu(0)>
dropout(A, 1.0)
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]]
<NDArray 5x4 @cpu(0)>
dropout(A,0.0)
[[ 0. 1. 2. 3.]
[ 4. 5. 6. 7.]
[ 8. 9. 10. 11.]
[12. 13. 14. 15.]
[16. 17. 18. 19.]]
<NDArray 5x4 @cpu(0)>
import sys
sys.path.append('..')
import utils
batch_size = 256
#数据获取
train_data, test_data = utils.load_data_fashion_mnist(batch_size)
#含两个隐藏层的多层感知机
num_inputs = 28*28
num_outputs = 10
num_hidden1 = 256
num_hidden2 = 256
weight_scale = .01
W1 = nd.random_normal(shape=(num_inputs, num_hidden1), scale=weight_scale)
b1 = nd.zeros(num_hidden1)
W2 = nd.random_normal(shape=(num_hidden1, num_hidden2), scale=weight_scale)
b2 = nd.zeros(num_hidden2)
W3 = nd.random_normal(shape=(num_hidden2, num_outputs), scale=weight_scale)
b3 = nd.zeros(num_outputs)
params = [W1, b1, W2, b2, W3, b3]
for param in params:
param.attach_grad()
#定义包含丢弃层的模型
drop_prob1 = 0.2
drop_prob2 = 0.5
def net(X):
X = X.reshape((-1, num_inputs))
# 第一层全连接。
h1 = nd.relu(nd.dot(X, W1) + b1)
# 在第一层全连接后添加丢弃层。
h1 = dropout(h1, drop_prob1)
# 第二层全连接。
h2 = nd.relu(nd.dot(h1, W2) + b2)
# 在第二层全连接后添加丢弃层。
h2 = dropout(h2, drop_prob2)
return nd.dot(h2, W3) + b3
#训练
from mxnet import autograd
from mxnet import gluon
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
learning_rate = .5
for epoch in range(5):
train_loss = 0.
train_acc = 0.
for data, label in train_data:
with autograd.record():
output = net(data)
loss = softmax_cross_entropy(output, label)
loss.backward()
utils.SGD(params, learning_rate/batch_size)
train_loss += nd.mean(loss).asscalar()
train_acc += utils.accuracy(output, label)
test_acc = utils.evaluate_accuracy(test_data, net)
print("Epoch %d. Loss: %f, Train acc %f, Test acc %f" % (
epoch, train_loss/len(train_data),
train_acc/len(train_data), test_acc))
D:UsersAdministratorAnaconda3libsite-packagesmxnetgluondatavisiondatasets.py:84: DeprecationWarning: The binary mode of fromstring is deprecated, as it behaves surprisingly on unicode inputs. Use frombuffer instead
label = np.fromstring(fin.read(), dtype=np.uint8).astype(np.int32)
D:UsersAdministratorAnaconda3libsite-packagesmxnetgluondatavisiondatasets.py:88: DeprecationWarning: The binary mode of fromstring is deprecated, as it behaves surprisingly on unicode inputs. Use frombuffer instead
data = np.fromstring(fin.read(), dtype=np.uint8)
Epoch 0. Loss: 1.134281, Train acc 0.563585, Test acc 0.762520
Epoch 1. Loss: 0.608693, Train acc 0.773304, Test acc 0.813802
Epoch 2. Loss: 0.498439, Train acc 0.815605, Test acc 0.832232
Epoch 3. Loss: 0.446880, Train acc 0.836922, Test acc 0.839243
Epoch 4. Loss: 0.420254, Train acc 0.847423, Test acc 0.839243
#gluon- dropout
#定义模型并添加丢弃层
from mxnet.gluon import nn
net = nn.Sequential()
drop_prob1 = 0.2
drop_prob2 = 0.5
with net.name_scope():
net.add(nn.Flatten())
# 第一层全连接。
net.add(nn.Dense(256, activation="relu"))
# 在第一层全连接后添加丢弃层。
net.add(nn.Dropout(drop_prob1))
# 第二层全连接。
net.add(nn.Dense(256, activation="relu"))
# 在第二层全连接后添加丢弃层。
net.add(nn.Dropout(drop_prob2))
net.add(nn.Dense(10))
net.initialize()
#读取数据并训练
import sys
sys.path.append('..')
import utils
from mxnet import nd
from mxnet import autograd
from mxnet import gluon
batch_size = 256
train_data, test_data = utils.load_data_fashion_mnist(batch_size)
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(),
'sgd', {'learning_rate': 0.5})
for epoch in range(5):
train_loss = 0.
train_acc = 0.
for data, label in train_data:
with autograd.record():
output = net(data)
loss = softmax_cross_entropy(output, label)
loss.backward()
trainer.step(batch_size)
train_loss += nd.mean(loss).asscalar()
train_acc += utils.accuracy(output, label)
test_acc = utils.evaluate_accuracy(test_data, net)
print("Epoch %d. Loss: %f, Train acc %f, Test acc %f" % (
epoch, train_loss/len(train_data),
train_acc/len(train_data), test_acc))
D:UsersAdministratorAnaconda3libsite-packagesmxnetgluondatavisiondatasets.py:84: DeprecationWarning: The binary mode of fromstring is deprecated, as it behaves surprisingly on unicode inputs. Use frombuffer instead
label = np.fromstring(fin.read(), dtype=np.uint8).astype(np.int32)
D:UsersAdministratorAnaconda3libsite-packagesmxnetgluondatavisiondatasets.py:88: DeprecationWarning: The binary mode of fromstring is deprecated, as it behaves surprisingly on unicode inputs. Use frombuffer instead
data = np.fromstring(fin.read(), dtype=np.uint8)
Epoch 0. Loss: 0.823313, Train acc 0.694728, Test acc 0.823217
Epoch 1. Loss: 0.512777, Train acc 0.810146, Test acc 0.845853
Epoch 2. Loss: 0.453018, Train acc 0.833267, Test acc 0.839643
Epoch 3. Loss: 0.414288, Train acc 0.849125, Test acc 0.868089
Epoch 4. Loss: 0.392432, Train acc 0.856954, Test acc 0.863582
Dropout(丢弃法)本质及实现:
一般来说,在集成学习里,我们可以对训练数据集有放回地采样若干次并分别训练若干个不同的分类器;测试时,把这些分类器的结果集成一下作为最终分类结果。
丢弃法,通常是对输入层或者隐含层做以下操作:
随机选择一部分该层的输出作为丢弃元素;
把丢弃元素乘以0;
把非丢弃元素拉伸。
事实上,丢弃法在模拟集成学习。试想,一个使用了丢弃法的多层神经网络本质上是原始网络的子集(节点和边)。举个例子,它可能长这个样子。
之前介绍过随机梯度下降算法:我们在训练神经网络模型时一般随机采样一个批量的训练数据。
丢弃法实质上是对每一个这样的数据集分别训练一个原神经网络子集的分类器。与一般的集成学习不同,这里每个原神经网络子集的分类器用的是同一套参数。因此丢弃法只是在模拟集成学习。原神经网络子集的分类器在不同的训练数据批量上训练并使用同一套参数。
因此,使用丢弃法的神经网络实质上是对输入层和隐含层的参数做了正则化:学到的参数使得原神经网络不同子集在训练数据上都尽可能表现良好。
补充:Forward-propagation & Back-propagation
反向传播(back-propagation)是计算深度学习模型参数梯度的方法。总的来说,反向传播中会依据微积分中的链式法则,按照输出层、靠近输出层的隐含层、靠近输入层的隐含层和输入层的次序,依次计算并存储模型损失函数有关模型各层的中间变量和参数的梯度。
反向传播对于各层中变量和参数的梯度计算可能会依赖各层变量和参数的当前值。对深度学习模型按照输入层、靠近输入层的隐含层、靠近输出层的隐含层和输出层的次序,依次计算并存储模型的中间变量叫做正向传播(forward-propagation)。
---------------------
作者:lizzy05
来源:CSDN
原文:https://blog.csdn.net/lizzy05/article/details/80162060
版权声明:本文为博主原创文章,转载请附上博文链接!