zoukankan      html  css  js  c++  java
  • 神经网络BP算法C和python代码

    上面只显示代码。

    详BP原理和神经网络的相关知识,请参阅:神经网络和反向传播算法推导

    首先是前向传播的计算:

    输入:
    首先为正整数 n、m、p、t,分别代表特征个数、训练样本个数、隐藏层神经元个数、输出
    层神经元个数。

    当中(1<n<=100,1<m<=1000, 1<p<=100, 1<t<=10)。


    随后为 m 行,每行有 n+1 个整数。每行代表一个样本中的 n 个特征值 (x 1 , x 2 ,..., x n ) 与样本的
    实际观測结果 y。特征值的取值范围是实数范围,实际观測结果为(1-t 的正整数)。
    最后为 2 组特征权值矩阵初始化值。
    第一组为输入层与隐藏层特征权值矩阵,矩阵大小为 p*(n+1)。
    第二组为隐藏层与输出层特征权值矩阵,矩阵大小为 t*(p+1)。
    输出:
    包含三部分:
    第一行为 1 个浮点数,是神经网络使用初始特征权值矩阵计算出的代价值 J。
    然后是 m 行,每行为 p 个浮点数,神经网络隐藏层的输出(不算偏移 bias)。


    最后是 m 行,每行为 t 个浮点数,神经网络输出层的输出(不算偏移 bias)。
    Sample Input1:
    3 3 5 3
    0.084147 0.090930 0.014112 3
    0.090930 0.065699 -0.053657 2
    2 3 4 1
    0.084147 -0.027942 -0.099999 -0.028790
    0.090930 0.065699 -0.053657 -0.096140
    0.014112 0.098936 0.042017 -0.075099
    -0.075680 0.041212 0.099061 0.014988
    -0.095892 -0.054402 0.065029 0.091295
    0.084147 -0.075680 0.065699 -0.054402 0.042017 -0.028790
    0.090930 -0.095892 0.098936 -0.099999 0.099061 -0.096140
    0.014112 -0.027942 0.041212 -0.053657 0.065029 -0.075099
    Sample Output1:
    2.0946610.518066 0.522540 0.506299 0.484257 0.476700
    0.519136 0.524614 0.507474 0.483449 0.474655
    0.404465 0.419895 0.509409 0.589979 0.587968
    0.514583 0.511113 0.497424
    0.514587 0.511139 0.497447
    0.515313 0.511164 0.496748

    此处须要补充说明的是这里计算的仅仅是单层神经网络而且在lable原本的值是3,2,1代表的是第一次输出第三个输出单元输出为1,第二次输出第二个输出单元输出为1...

    python代码例如以下:

    #coding=utf-8 
    from numpy import *
    #from math import *
    from numpy.distutils.core import numpy_cmdclass
    f=open( r'test')
    input=[]
    #数据预处理。把文件数据转换
    for each in f:
        input.append(each.strip().split())
    n,m,p,t=input[0]
    sample=input[1:int(n)+1]
    w_in_hidden=input[int(n)+1:int(n)+6]
    w_hidden_out=input[int(n)+6:]
    feature=[]#特征矩阵
    lable=[]#标记
    for each in sample:
        feature.append(each[:-1])
        lable.append(each[-1])
    #将list转化成矩阵
    feature=mat(feature)
    lable=mat(lable)
    w_in_hidden=mat(w_in_hidden)#隐藏层与输入层的权值矩阵
    w_hidden_out=mat(w_hidden_out)#隐藏层与输出层的权值矩阵
    #逆置
    feature=feature.T
    zero=mat(ones(feature.shape[0]))
    feature=row_stack((zero,feature))
    #将第0行增加矩阵,属矩阵拼接问题
    feature=feature.astype(dtype=float)
    #生成新的矩阵,并改变矩阵内部数据类型,曾经是str型的
    w_in_hidden=w_in_hidden.astype(dtype=float)
    lable=lable.astype(dtype=float)
    w_hidden_out=w_hidden_out.astype(dtype=float)
    hidden_output=dot(w_in_hidden,feature)
    hidden_output=hidden_output.T
    #此处exp是numpy里面自带的求矩阵指数的函数
    hidden_output=1/(1+exp(-1*hidden_output))
    print hidden_output#隐藏层的输出
    hidden_output=hidden_output.T
    zero=mat(ones(hidden_output.shape[1]))
    hidden_output=row_stack((zero,hidden_output))
    output=dot(w_hidden_out,hidden_output)
    output=output.T
    output=1/(1+exp(-1*output))
    print output#输出层的输出
    #lable原本的值是3,2,1代表的是第一次输出第三个输出单元输出为1,第二次输出第二个输出单元输出为1...
    lable=mat([[0,0,1],[0,1,0],[1,0,0]])
    lable=lable.T
    output=output.tolist()#将矩阵转化回list
    lable=lable.tolist()
    sum=0.0
    #计算误差,事实上也能够直接用矩阵计算。问题在于本人没有找到求矩阵对角线和的函数。且做一标记,找到补上
    for i in range (len(output)):
        for j in range (len(output[0])):
            sum+=math.log(output[i][j])*-lable[i][j]-math.log(1-output[i][j])*(1-lable[i][j])
    print sum/3
    

    此处输出顺序不正确,请忽略这样的小问题~~

    输出结果例如以下:

    C代码例如以下:(C代码)

    #include <stdio.h>
    #include <math.h>
    
    #define MAX_SAMPLE_NUMBER 1024
    #define MAX_FEATURE_DIMENSION 128
    #define MAX_LABEL_NUMBER 12
    
    double sigmoid(double z){
    	return 1 / (1 + exp(-z));
    }
    
    double hypothesis(double x[], double theta[], int feature_number){
    //此处的hypothesis计算的是某个神经元的输出
    	double h = 0;
    	for (int i = 0; i <= feature_number; i++){
    		h += x[i] * theta[i];
    	}
    	return sigmoid(h);
    }
    
    void forward_propagation(double a[],
    						 int feature_number,
    						 double W[][MAX_FEATURE_DIMENSION],
    						 int neuron_num,
    						 double output[]){
    
    	for (int i = 0; i < neuron_num; i++){
    		output[i+1] = hypothesis(a, W[i], feature_number);
    		//w[i]相应着第i个输出神经元的上一层权值
    	}
    }
    
    double compute_cost(double X[][MAX_FEATURE_DIMENSION], 
    					int y[],
    					int feature_number,
    					int sample_number,
    					double W1[][MAX_FEATURE_DIMENSION],
    					int hidden_layer_size,
    					double W2[][MAX_FEATURE_DIMENSION],
    					int label_num,
    					double a2[][MAX_FEATURE_DIMENSION],
    					double a3[][MAX_FEATURE_DIMENSION]){
    					//a2为隐藏层输出a3为输出层输出w1,w2同样
    	double sum = 0;
    	for (int i = 0; i < sample_number; i++){
    		X[i][0] = 1;
    		forward_propagation(X[i], feature_number, W1, hidden_layer_size, a2[i]);
    		a2[i][0] = 1;
    		forward_propagation(a2[i], hidden_layer_size, W2, label_num, a3[i]);
    		double yy[MAX_LABEL_NUMBER] = {0};
    		yy[y[i]] = 1;
    		for (int j = 1; j <= label_num; j++){
    			sum += -yy[j] * log(a3[i][j]) - (1 - yy[j]) * log(1 - a3[i][j]);
    		}
    	}
    	return sum / sample_number;
    }
    
    double X[MAX_SAMPLE_NUMBER][MAX_FEATURE_DIMENSION];
    int y[MAX_SAMPLE_NUMBER];
    double W1[MAX_FEATURE_DIMENSION][MAX_FEATURE_DIMENSION];
    double W2[MAX_FEATURE_DIMENSION][MAX_FEATURE_DIMENSION];
    double a2[MAX_SAMPLE_NUMBER][MAX_FEATURE_DIMENSION];
    double a3[MAX_SAMPLE_NUMBER][MAX_FEATURE_DIMENSION];
    
    int main(){
    	int feature_number;
    	int sample_number;
    	int hidden_layer_size;
    	int label_num;
    	scanf("%d %d %d %d", &feature_number, &sample_number, &hidden_layer_size, &label_num);
    	for (int i = 0; i < sample_number; i++){
    		for (int j = 1; j <= feature_number; j++){
    			scanf("%lf", &X[i][j]);
    		}
    		scanf("%d", &y[i]);
    	}
    	for (int i = 0; i < hidden_layer_size; i++){
    		for (int j = 0; j <= feature_number; j++){
    			scanf("%lf", &W1[i][j]);
    		}
    	}
    	for (int i = 0; i < label_num; i++){
    		for (int j = 0; j <= hidden_layer_size; j++){
    			scanf("%lf", &W2[i][j]);
    		}
    	}
    	double J = compute_cost(X, y, feature_number, sample_number,
    		W1, hidden_layer_size, W2, label_num, a2, a3);
    	printf("%lf
    ", J);
    	for (int i = 0; i < sample_number; i++){
    		for (int j = 1; j < hidden_layer_size; j++){
    			printf("%lf ", a2[i][j]);
    		}
    		printf("%lf
    ", a2[i][hidden_layer_size]);
    	}
    	for (int i = 0; i < sample_number; i++){
    		for (int j = 1; j < label_num; j++){
    			printf("%lf ", a3[i][j]);
    		}
    		printf("%lf
    ", a3[i][label_num]);
    	}
    	return 0;
    }

    结果例如以下:

    关于BP算法。没有找到合适的測试例子,此处只给出了C++版本号代码和自測数据。无验证集

    C++代码:

    #include <stdio.h>
    #include <math.h>
    
    double sigmoid(double z){
    	return 1 / (1 + exp(-z));
    }
    
    double hypothesis(double x[], double theta[], int feature_number){
    	double h = 0;
    	for (int i = 0; i <= feature_number; i++){
    		h += x[i] * theta[i];
    	}
    	return h;
    }
    
    #define MAX_FEATURE_DIMENSION 128
    #define MAX_LABEL_NUMBER 12
    
    void forward_propagation(double input[],
    						 int feature_number,
    						 double W[][MAX_FEATURE_DIMENSION],
    						 int neuron_num,
    						 double z[],
    						 double a[]){
    
    	for (int i = 0; i < neuron_num; i++){
    		z[i+1] = hypothesis(input, W[i], feature_number);
    		a[i+1] = sigmoid(z[i+1]);
    		//加1的原因是第一个要留作补充的神经元
    	}
    }
    
    double sigmoid_gradient(double z){
    	return sigmoid(z) * (1 - sigmoid(z));
    	//对sigmoid函数求导能够化成如此形式,要注意的是z才是自变量
    }
    
    void compute_layer_error(double layer_error[],
    						double W[][MAX_FEATURE_DIMENSION],
    						int neuron_num,
    						int feature_number,
    						double next_layer_error[],
    						double z[]){
    //此处计算的是theta(l)详细见上一篇博文
    	for (int i = 1; i <= feature_number; i++){
    		for (int j = 0; j < neuron_num; j++){
    			layer_error[i] += W[j][i] * next_layer_error[j + 1];//next_layer_error[j + 1]=theta(l+1)
    		}
    	}
    	for (int i = 1; i <=feature_number; i++){
    		layer_error[i] = layer_error[i] * sigmoid_gradient(z[i]);
    	}
    }
    void accumulate_gradient(double sum[][MAX_FEATURE_DIMENSION], 
    						 double layer_error[],
    						 int neuron_num,
    						 int feature_number,
    						 double a[]){
    						 //计算误差总和
    	for (int i = 0; i < neuron_num; i++){
    		for (int j = 0; j <= feature_number; j++){
    			sum[i][j] += layer_error[i+1] * a[j];
    		}
    	}
    }
    
    void compute_gradient(double X[][MAX_FEATURE_DIMENSION], 
    						int y[],
    						int feature_number,
    						int sample_number,
    						double W1[][MAX_FEATURE_DIMENSION],
    						int hidden_layer_size,
    						double W2[][MAX_FEATURE_DIMENSION],
    						int label_num,
    						double w1_grad[][MAX_FEATURE_DIMENSION],
    						double w2_grad[][MAX_FEATURE_DIMENSION]){
    
    	double grad1_sum[MAX_FEATURE_DIMENSION][MAX_FEATURE_DIMENSION] = {0};
    	double grad2_sum[MAX_FEATURE_DIMENSION][MAX_FEATURE_DIMENSION] = {0};
    	for (int i = 0; i < sample_number; i++){
    		X[i][0] = 1;
    		double z2[MAX_FEATURE_DIMENSION] = {0, 0};
    		double a2[MAX_FEATURE_DIMENSION] = {1, 0};		
    		forward_propagation(X[i], feature_number, W1, hidden_layer_size, z2, a2);
    		double z3[MAX_FEATURE_DIMENSION] = {0};
    		double a3[MAX_FEATURE_DIMENSION] = {0};		
    		forward_propagation(a2, hidden_layer_size, W2, label_num, z3, a3);
    		double yy[MAX_LABEL_NUMBER] = {0};
    		yy[y[i]] = 1;
    		
    		double layer3_error[MAX_FEATURE_DIMENSION] = {0};
    		for (int j = 1; j <= label_num; j++){
    			layer3_error[j] = a3[j] - yy[j];
    		}		
    		double layer2_error[MAX_FEATURE_DIMENSION] = {0};
    		compute_layer_error(layer2_error, W2, label_num, hidden_layer_size, layer3_error, z2);
    		accumulate_gradient(grad2_sum, layer3_error, label_num, hidden_layer_size, a2);
    		accumulate_gradient(grad1_sum, layer2_error, hidden_layer_size, feature_number, X[i]);
    	}
    	for (int i = 0; i < hidden_layer_size; i++){
    		for (int j = 0; j <= feature_number; j++){
    			w1_grad[i][j] = grad1_sum[i][j] / sample_number;
    		}
    	}
    	for (int i = 0; i < label_num; i++){
    		for (int j = 0; j <= hidden_layer_size; j++){
    			w2_grad[i][j] = grad2_sum[i][j] / sample_number;
    		}
    	}
    }
    
    int main(){
    	double X[][MAX_FEATURE_DIMENSION] = {
    		{0, 0.084147, 0.090930},
    		{0, 0.090930, 0.065699},
    		{0, 2, 3}
    	};
    	int y[] = {1, 2, 2};
    	int hidden_layer_size = 4;
    	int label_num = 2;
    	int feature_number = 2;
    	int sample_number = 3;
    	double W1[][MAX_FEATURE_DIMENSION] = {
    		{0.084147, -0.027942, -0.099999},
    		{0.090930, 0.065699, -0.053657},
    		{0.014112, 0.098936, 0.042017},
    		{-0.075680, 0.041212, 0.099061},
    	};
    	double W2[][MAX_FEATURE_DIMENSION] = {
    		{0.084147, -0.075680, 0.065699, -0.054402, 0.042017},
    		{0.090930, -0.095892, 0.098936, -0.099999, 0.099061}
    	};
    	double a2[10][MAX_FEATURE_DIMENSION] = {0};
    	double a3[10][MAX_FEATURE_DIMENSION] = {0};
    
    	double w1_grad[10][MAX_FEATURE_DIMENSION] = {0};
    	double w2_grad[10][MAX_FEATURE_DIMENSION] = {0};
    
    	compute_gradient(X, y, feature_number, 3, W1,
    					hidden_layer_size, W2, label_num, w1_grad, w2_grad);
    
    	printf("w1_grad:
    ");
    	for (int i = 0; i < hidden_layer_size; i++){
    		for (int j = 0; j <= feature_number; j++){
    			printf("%lf ", w1_grad[i][j]);
    		}
    		printf("
    ");
    	}
    
    	printf("w2_grad:
    ");
    	for (int i = 0; i < label_num; i++){
    		for (int j = 0; j <= hidden_layer_size; j++){
    			printf("%lf ", w2_grad[i][j]);
    		}
    		printf("
    ");
    	}
    	
    	return 0;
    }

    执行截图:


    版权声明:本文博主原创文章,博客,未经同意不得转载。

  • 相关阅读:
    OCP-1Z0-053-V13.02-252题
    Java中list.get(index)报错
    OCP-1Z0-053-V13.02-103题
    Hash unique和Sort unique
    如何解决mysql数据库8小时无连接自动关闭
    OCP-1Z0-053-V13.02-538题
    OCP-1Z0-053-V13.02-537题
    OCP-1Z0-053-V13.02-518题
    用绘本回忆青春创业经历——leo鉴书46
    OCP-1Z0-053-V13.02-502题
  • 原文地址:https://www.cnblogs.com/blfshiye/p/4886245.html
Copyright © 2011-2022 走看看