zoukankan html css js c++ java

machine learnig学习之神经网络反向传播

所用技术点：

1、one-hot编码:

defination：将1-10十个数字用另一种形式表示，1用[1,0,0,0,0,0,0,0,0,0]表示，依次类推

为什么使用one-hot编码：因为损失函数沿用的是逻辑回归的损失函数，儿逻辑回归中的y取值只有0和1，所以此处要将具体的数字换一种编码

反向传播思路：

首先来看看反向传播的目的：寻找最优的权重

工作流程：先经过一遍前向传播，并将所有中间变量缓存（有输入层参数、隐藏层参数、隐藏层通过激活函数参数、输出层参数、输出层通过激活函数参数（输出结果））——此处需要注意，初始化权重时，不能将权重全部设置为0（此处和逻辑回归不一样），否则没有意义，应该使用随机函数去一个对称小区间内的值；然后开始反向传播，通过求导链式法则的公式，将相应损失函数写出，然后加上相应的正则化项（惩罚项）——以防过拟合；反向传播得出最终的误差值后，通过scipy模块的自动优化模块取得最优参数并返回；神经网络模型训练完毕

源代码：

'''
background propagate of natural network（without optimize attribute）
'''

import numpy as np
import scipy.io as sio
import matplotlib.pyplot as plt
from scipy.optimize import minimize


'''
优化函数，如果选择无正则化梯度函数
则容易产生过拟合，正则化后，拟合程度会有所下降
'''
def nn_training(X,y):
    init_theta = np.random.uniform(-0.5,0.5,10285)
    res = minimize(
        fun = reg_cost,
        x0 = init_theta,
        args = (X,y,lamda),
        method = 'TNC',
        jac = reg_gradient,
        options = {'maxiter':300}
    )
    return res

def one_hot_encoder(raw_y):
    result = []
    for i in raw_y:# 遍历raw_y中的所有内容
        # 定义临时存放数组（转编码数组）
        y_temp = np.zeros(10)
        y_temp[i-1] = 1

        result.append(y_temp)

    return np.array(result)

# 序列化操作
def serialize(a,b):
    return np.append(a.flatten(),b.flatten())

def deserialize(theta_serialize):
    theta1 = theta_serialize[:25*401].reshape(25,401)
    theta2 = theta_serialize[25*401:].reshape(10,26)
    return theta1,theta2

def sigmoid(z):
    return 1/(1+np.exp(-z))

'''
前向传播函数
所有过程参数都需要返回，便于反向传播操作
a代表每一层经过sigmoid函数的结果
z表示通过theta运算，但是并未经过激活函数的值，h为输出值
'''
def feed_forward(theta_serialize,X):
    theta1,theta2 = deserialize(theta_serialize)
    a1 = X
    z2 = a1 @ theta1.T
    a2 = sigmoid(z2)
    a2 = np.insert(a2,0,values =1,axis = 1)
    z3 = a2 @ theta2.T
    h = sigmoid(z3)
    return a1,z2,a2,z3,h
'''
不带正则化的损失函数（不带正则化的计算次数会更多）
'''
def cost(theta_serialize,X,y):
    a1,z2,a2,z3,h = feed_forward(theta_serialize,X)
    J = -np.sum(y*np.log(h) + (1-y)*np.log(1-h))/len(X)
    return J

'''
带正则化的损失函数
'''
def reg_cost(theta_serialize,X,y,lamda):
    theta1,theta2 = deserialize(theta_serialize)
    sum1 = np.sum(np.power(theta1[:,1:],2))
    sum2 = np.sum(np.power(theta2[:,1:],2))
    reg = (sum1 + sum2)*lamda/(2*len(X))
    return reg + cost(theta_serialize,X,y)

'''
sigmoid函数的偏导数function
'''
def sigmoid_gradient(z):
    return sigmoid(z)*(1-sigmoid(z))

'''
无正则化的梯度下降
d代表误差
'''
def gradient(theta_serialize,X,y):
    theta1,theta2 = deserialize(theta_serialize)
    a1, z2, a2, z3, h = feed_forward(theta_serialize, X)
    d3 = h - y
    d2 = d3 @ theta2[:,1:]*sigmoid_gradient(z2)
    D2 = (d3.T @ a2)/len(X)
    D1 = (d2.T @ a1)/len(X)
    return serialize(D1,D2)

'''
正则化的梯度下降
d代表误差
'''
def reg_gradient(theta_serialize,X,y,lamda):
    D = gradient(theta_serialize,X,y)
    D1,D2 = deserialize(D)

    theta1,theta2 = deserialize(theta_serialize)
    # 加上惩罚项
    D1[:,1:] += theta1[:,1:]*lamda / len(X)
    D2[:,1:] += theta2[:,1:]*lamda / len(X)
    return serialize(D1,D2)
'''
隐藏层可视化，没有实际意义，因为其中的数据只有电脑能够识别，产生的图像看不出实际意义
'''
def plot_hidden_layer(theta):
    theta1,_ = deserialize(theta)
    hidden_layer = theta1[:,1:]# 25,400,因为第一列是附加的，所以此处需要去掉

    fig,ax = plt.subplots(ncols = 5,nrows= 5 ,figsize = (8,8),sharey=True,sharex=True)

    for r in range(5):
        for c in range(5):
            ax[r,c].imshow(hidden_layer[5*r+c].reshape(20,20).T,cmap = 'gray_r')
    # 取消x，y轴显示
    plt.xticks([])
    plt.yticks([])

    plt.show()


data = sio.loadmat('./data_set/ex4data1.mat')
raw_X = data['X']
raw_y = data['y']
X = np.insert(raw_X,0,values = 1,axis = 1)
# X.shape

y = one_hot_encoder(raw_y)

theta = sio.loadmat('./data_set/ex4weights.mat')

theta1,theta2 = theta['Theta1'],theta['Theta2']
# print(theta1)
# print(theta2)

theta_serialize = serialize(theta1,theta2)

lamda = 10
# print(reg_cost(theta_serialize,X,y,lamda))


res = nn_training(X,y)
# print(res.x)
raw_y = raw_y.reshape(5000,)

_,_,_,_,h = feed_forward(res.x,X)
y_pred = np.argmax(h,axis = 1) + 1
acc = np.mean(y_pred == raw_y)
plot_hidden_layer(res.x)

以上

希望对大家有所帮助

查看全文

相关阅读:
JS中算法之排序算法
 JS中数据结构之图
 JS中数据结构之二叉查找树
 JS中数据结构之集合
 JS中数据结构之散列表
 JS中生成随机数
 JS中数据结构之字典
 JS中数据结构之链表
 JS中数据结构之队列
 JS中数据结构之栈

原文地址：https://www.cnblogs.com/lavender-pansy/p/13514467.html