zoukankan      html  css  js  c++  java
  • 用numpy实现BP神经网络

    在本篇博文中,我们只使用numpy来搭建一个简单的包含输入层、隐藏层、输出层的神经网络,我们选择sigmoid作为激活函数,选择均方差损失函数,最后使用mnist数据集进行了训练和测试。

    1、公式推导

    均方差损失函数:

    [loss = J(W,b,x,y)=frac{1}{2}||a^L-y||^2 ]

    前向传播过程:

    [ z^l = W^la^{l-1}+b^l \ a^l=sigma(z^l) ]

    反向传播过程:

    [ frac{partial J}{partial W^l}=delta^l(a^{l-1})^T \ frac{partial J}{partial b^l}=delta^l \ delta^l=[(W^{l+1})^Tdelta^{l+1}]odotsigma'(z^l)\ delta^L=(a^L-y)odotsigma'(z^L) ]

    2、一些工具函数

    包括sigmoid激活函数及其一阶导数,和将标签进行one-hot编码的函数,如下所示:

    # 标签one-hot处理
    def onehot(targets, num):
        result = np.zeros((num, 10))
        for i in range(num):
            result[i][targets[i]] = 1
        return result
    
    # sigmoid
    def sigmoid(x):
        return 1 / (1 + np.exp(-x))
    
    # sigmoid的一阶导数
    def Dsigmoid(x):
        return sigmoid(x)*(1-sigmoid(x))
    
    

    3、神经网络具体实现

    代码中的self.d2和d1代表公式中的(delta),其他变量均直接按照公式命名,代码如下:

    class NN(object):
        def __init__(self, l0, l1, l2):
            self.lr = 0.1                                        # 学习率
            self.W1 = np.random.randn(l0, l1) * 0.01             # 初始化
            self.b1 = np.random.randn(l1) * 0.01
            self.W2 = np.random.randn(l1, l2) * 0.01
            self.b2 = np.random.randn(l2) * 0.01
    
        # 前向传播
        def forward(self, X, y):
            self.X = X                                           # m x 784
            self.z1 = np.dot(X, self.W1) + self.b1               # m x 500, 500是中间层层数
            self.a1 = sigmoid(self.z1)
            self.z2 = np.dot(self.a1, self.W2) + self.b2         # m x 10
            self.a2 = sigmoid(self.z2)
            loss = np.sum((self.a2 - y) * (self.a2 - y)) / 2     # 均方差
            self.d2 = (self.a2 - y) * Dsigmoid(self.z2)          # m x 10 , 用于误差反向传播
            return loss, self.a2
    
        # 反向传播
        def backward(self):
            dW2 = np.dot(self.a1.T, self.d2) / 3                  # 500 x 10, batchsize=3
            db2 = np.sum(self.d2, axis=0) / 3                     # 10
            d1 = np.dot(self.d2, self.W2.T) * Dsigmoid(self.z1)   # m x 500, 用于误差反向传播
            dW1 = np.dot(self.X.T, d1) / 3                        # 784x 500
            db1 = np.sum(d1, axis=0) / 3                          # 500
    
            self.W2 -= self.lr * dW2
            self.b2 -= self.lr * db2
            self.W1 -= self.lr * dW1
            self.b1 -= self.lr * db1
    
    

    4、训练和测试

    我们直接使用了torchvision集成的mnist数据集,在训练后将权重参数保存到文件中,测试时再从文件中读取权重参数,最后我们测试的准确率达到了96.48%。

    def train():
        nn = NN(784, 500, 10)
    
        for epoch in range(10):
            for i in range(0, 60000, 3):
                X = train_data.data[i:i + 3]
                y = train_data.targets[i:i + 3]
                loss, _ = nn.forward(X, y)
                print("Epoch:", epoch, "-", i, ":", "{:.3f}".format(loss))
                nn.backward()
            np.savez("data.npz", w1=nn.W1, b1=nn.b1, w2=nn.W2, b2=nn.b2)
    
    def test():
        r = np.load("data.npz")
        nn = NN(784, 500, 10)
        nn.W1 = r["w1"]
        nn.b1 = r["b1"]
        nn.W2 = r["w2"]
        nn.b2 = r["b2"]
        _, result = nn.forward(test_data.data, test_data.targets2)
        result = np.argmax(result, axis=1)
        precison = np.sum(result==test_data.targets) / 10000
        print("Precison:", precison)
    
    # Mnist手写数字集
    train_data = torchvision.datasets.MNIST(root='./mnist/', train=True, download=False)
    test_data = torchvision.datasets.MNIST(root='./mnist/', train=False)
    train_data.data = train_data.data.numpy()         # [60000,28,28]
    train_data.targets = train_data.targets.numpy()   # [60000]
    test_data.data = test_data.data.numpy()           # [10000,28,28]
    test_data.targets = test_data.targets.numpy()     # [10000]
    
    # 输入向量处理
    train_data.data = train_data.data.reshape(60000, 28 * 28) / 255.  # (60000, 784)
    test_data.data = test_data.data.reshape(10000, 28 * 28) / 255.
    
    # 标签one-hot处理
    train_data.targets = onehot(train_data.targets, 60000) # (60000, 10)
    test_data.targets2 = onehot(test_data.targets, 10000)  # 用于前向传播
    
    train()
    #test()
    

    5、完整代码

    import torchvision
    import numpy as np
    
    # 标签one-hot处理
    def onehot(targets, num):
        result = np.zeros((num, 10))
        for i in range(num):
            result[i][targets[i]] = 1
        return result
    
    # sigmoid
    def sigmoid(x):
        return 1 / (1 + np.exp(-x))
    
    # sigmoid的一阶导数
    def Dsigmoid(x):
        return sigmoid(x)*(1-sigmoid(x))
    
    
    class NN(object):
        def __init__(self, l0, l1, l2):
            self.lr = 0.1                                        # 学习率
            self.W1 = np.random.randn(l0, l1) * 0.01             # 初始化
            self.b1 = np.random.randn(l1) * 0.01
            self.W2 = np.random.randn(l1, l2) * 0.01
            self.b2 = np.random.randn(l2) * 0.01
    
        # 前向传播
        def forward(self, X, y):
            self.X = X                                           # m x 784
            self.z1 = np.dot(X, self.W1) + self.b1               # m x 500, 等于中间层层数
            self.a1 = sigmoid(self.z1)
            self.z2 = np.dot(self.a1, self.W2) + self.b2         # m x 10
            self.a2 = sigmoid(self.z2)
            loss = np.sum((self.a2 - y) * (self.a2 - y)) / 2     # 均方差
            self.d2 = (self.a2 - y) * Dsigmoid(self.z2)          # m x 10 , 用于反向传播
            return loss, self.a2
    
        # 反向传播
        def backward(self):
            dW2 = np.dot(self.a1.T, self.d2) / 3                  # 500 x 10, batchsize=3
            db2 = np.sum(self.d2, axis=0) / 3                     # 10
            d1 = np.dot(self.d2, self.W2.T) * Dsigmoid(self.z1)   # m x 500, 用于反向传播
            dW1 = np.dot(self.X.T, d1) / 3                        # 784x 500
            db1 = np.sum(d1, axis=0) / 3                          # 500
    
            self.W2 -= self.lr * dW2
            self.b2 -= self.lr * db2
            self.W1 -= self.lr * dW1
            self.b1 -= self.lr * db1
    
    
    def train():
        nn = NN(784, 500, 10)
    
        for epoch in range(10):
            for i in range(0, 60000, 3):
                X = train_data.data[i:i + 3]
                y = train_data.targets[i:i + 3]
                loss, _ = nn.forward(X, y)
                print("Epoch:", epoch, "-", i, ":", "{:.3f}".format(loss))
                nn.backward()
            np.savez("data.npz", w1=nn.W1, b1=nn.b1, w2=nn.W2, b2=nn.b2)
    
    def test():
        r = np.load("data.npz")
        nn = NN(784, 500, 10)
        nn.W1 = r["w1"]
        nn.b1 = r["b1"]
        nn.W2 = r["w2"]
        nn.b2 = r["b2"]
        _, result = nn.forward(test_data.data, test_data.targets2)
        result = np.argmax(result, axis=1)
        precison = np.sum(result==test_data.targets) / 10000
        print("Precison:", precison)
    
    if __name__ == '__main__':
    
        # Mnist手写数字集
        train_data = torchvision.datasets.MNIST(root='./mnist/', train=True, download=False)
        test_data = torchvision.datasets.MNIST(root='./mnist/', train=False)
        train_data.data = train_data.data.numpy()         # [60000,28,28]
        train_data.targets = train_data.targets.numpy()   # [60000]
        test_data.data = test_data.data.numpy()           # [10000,28,28]
        test_data.targets = test_data.targets.numpy()     # [10000]
    
        # 输入向量处理
        train_data.data = train_data.data.reshape(60000, 28 * 28) / 255.  # (60000, 784)
        test_data.data = test_data.data.reshape(10000, 28 * 28) / 255.
    
        # 标签one-hot处理
        train_data.targets = onehot(train_data.targets, 60000) # (60000, 10)
        test_data.targets2 = onehot(test_data.targets, 10000)  # 用于前向传播
    
        train()
        #test()
    
  • 相关阅读:
    拼linq 时网上整理的一个类
    ASP.NET MVC controller 之间传JS值
    javascript 事件的一点感悟
    C#扩展特性
    javascript对json对象的序列化与反序列化
    javascript序列化json 第二篇
    单列模式
    Foreach 原理
    浅浅一谈 设计模式
    CRC循环冗余校验码总结(转)
  • 原文地址:https://www.cnblogs.com/qxcheng/p/11267072.html
Copyright © 2011-2022 走看看