zoukankan      html  css  js  c++  java
  • 从零和使用mxnet实现dropout

    需求:

    • 从零和使用mxnet实现dropout

    数据集:

    • 使用load_digits()手写数字数据集

    要求:

    • 使用1个掩藏层n_hidden1 = 36,激活函数为relu,损失函数为softmax交叉熵损失函数

    注意:

    • drop函数的实现方法
    • 训练和测试时drop的区别

    1.从零实现dropout

    from sklearn import datasets
    from mxnet import gluon,nd,autograd,init
    from mxnet.gluon import nn,data as gdata,loss as gloss,trainer
    
    # 加载数据集
    digits = datasets.load_digits()
    features,labels = nd.array(digits['data']),nd.array(digits['target'])
    print(features.shape,labels.shape)
    labels_onehot = nd.one_hot(labels,10)
    print(labels_onehot.shape)
    
    (1797, 64) (1797,)
    (1797, 10)
    
    class NeuroNet:
        def __init__(self,n_inputs,n_hidden1,n_outputs):
            hidden_layer = Layer(n_inputs,n_hidden1)
            output_layer = Layer(n_hidden1,n_outputs)
            self.layers = [hidden_layer,output_layer]
            
            for layer in self.layers:
                for param in layer.params:
                    param.attach_grad()
        
        def softmax(self,x):
            step1 = x.exp()
            step2 = step1 / step1.sum(axis=1,keepdims=True)
            return step2
        
        def softmaxCrossEntropyLoss(self,y_pred,y):
            step1 = -y * y_pred.log()
            step2 = step1.sum(axis=1)
            loss = step2.sum(axis=0) / len(y)
            return loss
            
        def drop(self,x,drop_probability,train=True):
            '''
            神经元被丢弃的概率为p
            '''
            if train:
                mask = nd.random.uniform(0,1,shape=x.shape,dtype='float32') > drop_probability
                return mask * x / (1 - drop_probability)
            else:
                return x
            
        def forward(self,x,train=True):
            for layer in self.layers[:-1]:
                step1 = layer.forward(x)
                step2 = self.drop(step1,0.2,train)
                x = step2
            output_layer = self.layers[-1]
            return self.softmax(output_layer.forward(x))
        
        def sgd(self,learning_rate,batch_size):
            '''
            使用随机梯度下降更新所有权重和偏置
            '''
            for layer in self.layers:
                layer.sgd(learning_rate,batch_size)
                    
        def dataIter(self,x,y,batch_size):
            dataset = gdata.ArrayDataset(x,y)
            return gdata.DataLoader(dataset,batch_size,shuffle=True)
        
        def fit(self,x,y,epoches,batch_size,learning_rate):
            for epoch in range(epoches):
                for x_batch,y_batch in self.dataIter(x,y,batch_size):
                    with autograd.record():
                        y_pred = self.forward(x_batch,train=True)
                        loss = self.softmaxCrossEntropyLoss(y_pred,y_batch)
                    loss.backward()
                    self.sgd(learning_rate,batch_size)
                if epoch % 50 == 0:
                    y_pred_all = self.forward(x,train=False)
                    loss_all = self.softmaxCrossEntropyLoss(y_pred_all,y)
                    accuracy_score = self.accuracyScore(y_pred_all,y)
                    print('epoch:{},loss:{},accuracy:{}'.format(epoch+50,loss_all,accuracy_score))
                
        def predict(self,x):
            y_pred = self.forward(x)
            return y_pred.argmax(axis=0)
        
        def accuracyScore(self,y_pred,y):
            acc_sum = (y_pred.argmax(axis=1) == y.argmax(axis=1)).sum().asscalar()
            return acc_sum / len(y)
            
    class Layer:
        def __init__(self,n_inputs,n_outputs):
            weight = nd.random.normal(scale=0.01,shape=(n_inputs,n_outputs))
            bias = nd.zeros(shape=(n_outputs))
            self.params = [weight,bias]
        
        def relu(self,x):
            return nd.maximum(x,0)
        
        def forward(self,x):
            step1 = nd.dot(x,self.params[0]) + self.params[1]
            return self.relu(step1)
        
        def sgd(self,learning_rate,batch_size):
            for param in self.params:
                param[:] = param - learning_rate * param.grad / batch_size 
                
        def print_params(self):
            for param in self.params:
                print(param)
    
    net = NeuroNet(64,36,10)
    net.fit(features,labels_onehot,epoches=500,batch_size=200,learning_rate=0.5)
    
    epoch:50,loss:
    [2.2988722]
    <NDArray 1 @cpu(0)>,accuracy:0.18308291597106288
    epoch:100,loss:
    [1.4126126]
    <NDArray 1 @cpu(0)>,accuracy:0.7395659432387313
    epoch:150,loss:
    [0.46316707]
    <NDArray 1 @cpu(0)>,accuracy:0.9259877573734001
    epoch:200,loss:
    [0.24678323]
    <NDArray 1 @cpu(0)>,accuracy:0.9493600445186422
    epoch:250,loss:
    [0.17839472]
    <NDArray 1 @cpu(0)>,accuracy:0.9610461880912632
    epoch:300,loss:
    [0.14298467]
    <NDArray 1 @cpu(0)>,accuracy:0.9688369504730105
    epoch:350,loss:
    [0.1198809]
    <NDArray 1 @cpu(0)>,accuracy:0.9738452977184195
    epoch:400,loss:
    [0.10388324]
    <NDArray 1 @cpu(0)>,accuracy:0.9782971619365609
    epoch:450,loss:
    [0.0917427]
    <NDArray 1 @cpu(0)>,accuracy:0.9827490261547023
    epoch:500,loss:
    [0.08237094]
    <NDArray 1 @cpu(0)>,accuracy:0.9849749582637729
    
    print('预测结果:',net.predict(features[:10]))
    print('真实结果:',labels[:10])
    
    预测结果: 
    [0. 1. 2. 3. 4. 5. 6. 7. 8. 9.]
    <NDArray 10 @cpu(0)>
    真实结果: 
    [0. 1. 2. 3. 4. 5. 6. 7. 8. 9.]
    <NDArray 10 @cpu(0)>
    

    2.使用mxnet实现dropout

    n_inputs = 64
    n_hiddens = 36
    n_outputs = 10
    
    # 定义模型
    net = nn.Sequential()
    net.add(nn.Dense(n_hiddens,activation='relu'))
    net.add(nn.Dropout(rate=0.2))
    net.add(nn.Dense(n_outputs))
    
    # 初始化模型
    net.initialize(init.Normal(sigma=0.01))
    
    # 损失函数
    loss = gloss.SoftmaxCrossEntropyLoss(sparse_label=False)
    
    optimizer = trainer.Trainer(net.collect_params(), 'sgd', {'learning_rate':0.5})
    
    # 训练模型
    epoches = 500
    batch_size = 200
    
    dataset = gdata.ArrayDataset(features,labels_onehot)
    dataIter = gdata.DataLoader(dataset,batch_size,shuffle=True)
    for epoch in range(epoches):
        for x_batch,y_batch in dataIter:
            with autograd.record():
                y_pred = net.forward(x_batch)
                l = loss(y_pred, y_batch).sum() / batch_size
            l.backward()
            optimizer.step(batch_size)
        if epoch % 50 == 0:
            y_all_pred = net.forward(features)
            acc_sum = (y_all_pred.argmax(axis=1) == labels_onehot.argmax(axis=1)).sum().asscalar()
            print('epoch:{},loss:{},accuracy:{}'.format(epoch+50,loss(y_all_pred,labels_onehot).sum() / len(labels_onehot),acc_sum/len(y_all_pred)))
    
    epoch:50,loss:
    [2.2981045]
    <NDArray 1 @cpu(0)>,accuracy:0.16304952698942682
    epoch:100,loss:
    [0.97166663]
    <NDArray 1 @cpu(0)>,accuracy:0.867557039510295
    epoch:150,loss:
    [0.3836201]
    <NDArray 1 @cpu(0)>,accuracy:0.9243183082915971
    epoch:200,loss:
    [0.24329802]
    <NDArray 1 @cpu(0)>,accuracy:0.9449081803005008
    epoch:250,loss:
    [0.18068495]
    <NDArray 1 @cpu(0)>,accuracy:0.9577072899276572
    epoch:300,loss:
    [0.14546551]
    <NDArray 1 @cpu(0)>,accuracy:0.9660545353366722
    epoch:350,loss:
    [0.1219953]
    <NDArray 1 @cpu(0)>,accuracy:0.9727323316638843
    epoch:400,loss:
    [0.10563282]
    <NDArray 1 @cpu(0)>,accuracy:0.9760712298274903
    epoch:450,loss:
    [0.09357208]
    <NDArray 1 @cpu(0)>,accuracy:0.9788536449638287
    epoch:500,loss:
    [0.08368526]
    <NDArray 1 @cpu(0)>,accuracy:0.9816360601001669
  • 相关阅读:
    存储过程3前台
    最简单Login程序
    存储过程前台2
    程序员 开发工具箱
    存储过程4前台
    存储过程 insert
    公司网络解决方案
    存储过程前台
    linux常用指令
    ReentrantLock源码解析3优先响应中断的lockInterruptibly
  • 原文地址:https://www.cnblogs.com/xiaobingqianrui/p/11567408.html
Copyright © 2011-2022 走看看