zoukankan      html  css  js  c++  java
  • dropout——gluon

    https://blog.csdn.net/lizzy05/article/details/80162060

    from mxnet import nd

    def dropout(X, drop_probability):
    keep_probability = 1 - drop_probability
    assert 0 <= keep_probability <= 1
    # 这种情况下把全部元素都丢弃。
    if keep_probability == 0:
    return X.zeros_like()

    # 随机选择一部分该层的输出作为丢弃元素。
    mask = nd.random.uniform(
    0, 1.0, X.shape, ctx=X.context) < keep_probability
    # 保证 E[dropout(X)] == X
    scale = 1 / keep_probability
    return mask * X * scale

    A = nd.arange(20).reshape((5,4))
    dropout(A, 0.0)
    dropout(A, 0.5)
    dropout(A, 1.0)
    D:UsersAdministratorAnaconda3libsite-packagesh5py\__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
    from ._conv import register_converters as _register_converters

    [[0. 0. 0. 0.]
    [0. 0. 0. 0.]
    [0. 0. 0. 0.]
    [0. 0. 0. 0.]
    [0. 0. 0. 0.]]
    <NDArray 5x4 @cpu(0)>

    dropout(A, 0.5)

    [[ 0. 2. 0. 0.]
    [ 8. 0. 0. 0.]
    [16. 0. 0. 0.]
    [24. 0. 0. 0.]
    [ 0. 34. 36. 38.]]
    <NDArray 5x4 @cpu(0)>

    dropout(A, 1.0)

    [0. 0. 0. 0.]
    [0. 0. 0. 0.]
    [0. 0. 0. 0.]
    [0. 0. 0. 0.]]
    <NDArray 5x4 @cpu(0)>

    dropout(A,0.0)

    [[ 0. 1. 2. 3.]
    [ 4. 5. 6. 7.]
    [ 8. 9. 10. 11.]
    [12. 13. 14. 15.]
    [16. 17. 18. 19.]]
    <NDArray 5x4 @cpu(0)>

    import sys
    sys.path.append('..')
    import utils
    batch_size = 256
    #数据获取
    train_data, test_data = utils.load_data_fashion_mnist(batch_size)

    #含两个隐藏层的多层感知机
    num_inputs = 28*28
    num_outputs = 10

    num_hidden1 = 256
    num_hidden2 = 256
    weight_scale = .01

    W1 = nd.random_normal(shape=(num_inputs, num_hidden1), scale=weight_scale)
    b1 = nd.zeros(num_hidden1)

    W2 = nd.random_normal(shape=(num_hidden1, num_hidden2), scale=weight_scale)
    b2 = nd.zeros(num_hidden2)

    W3 = nd.random_normal(shape=(num_hidden2, num_outputs), scale=weight_scale)
    b3 = nd.zeros(num_outputs)

    params = [W1, b1, W2, b2, W3, b3]

    for param in params:
    param.attach_grad()

    #定义包含丢弃层的模型

    drop_prob1 = 0.2
    drop_prob2 = 0.5

    def net(X):
    X = X.reshape((-1, num_inputs))
    # 第一层全连接。
    h1 = nd.relu(nd.dot(X, W1) + b1)
    # 在第一层全连接后添加丢弃层。
    h1 = dropout(h1, drop_prob1)
    # 第二层全连接。
    h2 = nd.relu(nd.dot(h1, W2) + b2)
    # 在第二层全连接后添加丢弃层。
    h2 = dropout(h2, drop_prob2)
    return nd.dot(h2, W3) + b3

    #训练
    from mxnet import autograd
    from mxnet import gluon

    softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

    learning_rate = .5

    for epoch in range(5):
    train_loss = 0.
    train_acc = 0.
    for data, label in train_data:
    with autograd.record():
    output = net(data)
    loss = softmax_cross_entropy(output, label)
    loss.backward()
    utils.SGD(params, learning_rate/batch_size)

    train_loss += nd.mean(loss).asscalar()
    train_acc += utils.accuracy(output, label)

    test_acc = utils.evaluate_accuracy(test_data, net)
    print("Epoch %d. Loss: %f, Train acc %f, Test acc %f" % (
    epoch, train_loss/len(train_data),
    train_acc/len(train_data), test_acc))
    D:UsersAdministratorAnaconda3libsite-packagesmxnetgluondatavisiondatasets.py:84: DeprecationWarning: The binary mode of fromstring is deprecated, as it behaves surprisingly on unicode inputs. Use frombuffer instead
    label = np.fromstring(fin.read(), dtype=np.uint8).astype(np.int32)
    D:UsersAdministratorAnaconda3libsite-packagesmxnetgluondatavisiondatasets.py:88: DeprecationWarning: The binary mode of fromstring is deprecated, as it behaves surprisingly on unicode inputs. Use frombuffer instead
    data = np.fromstring(fin.read(), dtype=np.uint8)
    Epoch 0. Loss: 1.134281, Train acc 0.563585, Test acc 0.762520
    Epoch 1. Loss: 0.608693, Train acc 0.773304, Test acc 0.813802
    Epoch 2. Loss: 0.498439, Train acc 0.815605, Test acc 0.832232
    Epoch 3. Loss: 0.446880, Train acc 0.836922, Test acc 0.839243
    Epoch 4. Loss: 0.420254, Train acc 0.847423, Test acc 0.839243

    #gluon- dropout
    #定义模型并添加丢弃层

    from mxnet.gluon import nn

    net = nn.Sequential()
    drop_prob1 = 0.2
    drop_prob2 = 0.5

    with net.name_scope():
    net.add(nn.Flatten())
    # 第一层全连接。
    net.add(nn.Dense(256, activation="relu"))
    # 在第一层全连接后添加丢弃层。
    net.add(nn.Dropout(drop_prob1))
    # 第二层全连接。
    net.add(nn.Dense(256, activation="relu"))
    # 在第二层全连接后添加丢弃层。
    net.add(nn.Dropout(drop_prob2))
    net.add(nn.Dense(10))
    net.initialize()

    #读取数据并训练

    import sys
    sys.path.append('..')
    import utils
    from mxnet import nd
    from mxnet import autograd
    from mxnet import gluon

    batch_size = 256
    train_data, test_data = utils.load_data_fashion_mnist(batch_size)

    softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
    trainer = gluon.Trainer(net.collect_params(),
    'sgd', {'learning_rate': 0.5})

    for epoch in range(5):
    train_loss = 0.
    train_acc = 0.
    for data, label in train_data:
    with autograd.record():
    output = net(data)
    loss = softmax_cross_entropy(output, label)
    loss.backward()
    trainer.step(batch_size)

    train_loss += nd.mean(loss).asscalar()
    train_acc += utils.accuracy(output, label)

    test_acc = utils.evaluate_accuracy(test_data, net)
    print("Epoch %d. Loss: %f, Train acc %f, Test acc %f" % (
    epoch, train_loss/len(train_data),
    train_acc/len(train_data), test_acc))
    D:UsersAdministratorAnaconda3libsite-packagesmxnetgluondatavisiondatasets.py:84: DeprecationWarning: The binary mode of fromstring is deprecated, as it behaves surprisingly on unicode inputs. Use frombuffer instead
    label = np.fromstring(fin.read(), dtype=np.uint8).astype(np.int32)
    D:UsersAdministratorAnaconda3libsite-packagesmxnetgluondatavisiondatasets.py:88: DeprecationWarning: The binary mode of fromstring is deprecated, as it behaves surprisingly on unicode inputs. Use frombuffer instead
    data = np.fromstring(fin.read(), dtype=np.uint8)
    Epoch 0. Loss: 0.823313, Train acc 0.694728, Test acc 0.823217
    Epoch 1. Loss: 0.512777, Train acc 0.810146, Test acc 0.845853
    Epoch 2. Loss: 0.453018, Train acc 0.833267, Test acc 0.839643
    Epoch 3. Loss: 0.414288, Train acc 0.849125, Test acc 0.868089
    Epoch 4. Loss: 0.392432, Train acc 0.856954, Test acc 0.863582
    Dropout(丢弃法)本质及实现:

    一般来说,在集成学习里,我们可以对训练数据集有放回地采样若干次并分别训练若干个不同的分类器;测试时,把这些分类器的结果集成一下作为最终分类结果。

    丢弃法,通常是对输入层或者隐含层做以下操作:

    随机选择一部分该层的输出作为丢弃元素;
    把丢弃元素乘以0;
    把非丢弃元素拉伸。
    事实上,丢弃法在模拟集成学习。试想,一个使用了丢弃法的多层神经网络本质上是原始网络的子集(节点和边)。举个例子,它可能长这个样子。


    之前介绍过随机梯度下降算法:我们在训练神经网络模型时一般随机采样一个批量的训练数据。

    丢弃法实质上是对每一个这样的数据集分别训练一个原神经网络子集的分类器。与一般的集成学习不同,这里每个原神经网络子集的分类器用的是同一套参数。因此丢弃法只是在模拟集成学习。原神经网络子集的分类器在不同的训练数据批量上训练并使用同一套参数。

    因此,使用丢弃法的神经网络实质上是对输入层和隐含层的参数做了正则化:学到的参数使得原神经网络不同子集在训练数据上都尽可能表现良好。

    补充:Forward-propagation & Back-propagation

    反向传播(back-propagation)是计算深度学习模型参数梯度的方法。总的来说,反向传播中会依据微积分中的链式法则,按照输出层、靠近输出层的隐含层、靠近输入层的隐含层和输入层的次序,依次计算并存储模型损失函数有关模型各层的中间变量和参数的梯度。

    反向传播对于各层中变量和参数的梯度计算可能会依赖各层变量和参数的当前值。对深度学习模型按照输入层、靠近输入层的隐含层、靠近输出层的隐含层和输出层的次序,依次计算并存储模型的中间变量叫做正向传播(forward-propagation)。
    ---------------------
    作者:lizzy05
    来源:CSDN
    原文:https://blog.csdn.net/lizzy05/article/details/80162060
    版权声明:本文为博主原创文章,转载请附上博文链接!

  • 相关阅读:
    Win7下用IIS发布网站
    进程的端口被占用的解决方案
    JS event loop
    慕课网
    angular js
    PowerDesign生成数据库
    CodeSmith 代码生成器
    微信小程序开发学习资料
    SSO 单点登录
    面试
  • 原文地址:https://www.cnblogs.com/jukan/p/10240597.html
Copyright © 2011-2022 走看看