zoukankan      html  css  js  c++  java
  • 神经网络BP算法C和python代码

    上面只显示代码。

    详BP原理和神经网络的相关知识,请参阅:神经网络和反向传播算法推导

    首先是前向传播的计算:

    输入:
    首先为正整数 n、m、p、t,分别代表特征个数、训练样本个数、隐藏层神经元个数、输出
    层神经元个数。

    当中(1<n<=100,1<m<=1000, 1<p<=100, 1<t<=10)。


    随后为 m 行,每行有 n+1 个整数。每行代表一个样本中的 n 个特征值 (x 1 , x 2 ,..., x n ) 与样本的
    实际观測结果 y。特征值的取值范围是实数范围,实际观測结果为(1-t 的正整数)。
    最后为 2 组特征权值矩阵初始化值。
    第一组为输入层与隐藏层特征权值矩阵,矩阵大小为 p*(n+1)。
    第二组为隐藏层与输出层特征权值矩阵,矩阵大小为 t*(p+1)。
    输出:
    包含三部分:
    第一行为 1 个浮点数,是神经网络使用初始特征权值矩阵计算出的代价值 J。
    然后是 m 行,每行为 p 个浮点数,神经网络隐藏层的输出(不算偏移 bias)。


    最后是 m 行,每行为 t 个浮点数,神经网络输出层的输出(不算偏移 bias)。
    Sample Input1:
    3 3 5 3
    0.084147 0.090930 0.014112 3
    0.090930 0.065699 -0.053657 2
    2 3 4 1
    0.084147 -0.027942 -0.099999 -0.028790
    0.090930 0.065699 -0.053657 -0.096140
    0.014112 0.098936 0.042017 -0.075099
    -0.075680 0.041212 0.099061 0.014988
    -0.095892 -0.054402 0.065029 0.091295
    0.084147 -0.075680 0.065699 -0.054402 0.042017 -0.028790
    0.090930 -0.095892 0.098936 -0.099999 0.099061 -0.096140
    0.014112 -0.027942 0.041212 -0.053657 0.065029 -0.075099
    Sample Output1:
    2.0946610.518066 0.522540 0.506299 0.484257 0.476700
    0.519136 0.524614 0.507474 0.483449 0.474655
    0.404465 0.419895 0.509409 0.589979 0.587968
    0.514583 0.511113 0.497424
    0.514587 0.511139 0.497447
    0.515313 0.511164 0.496748

    此处须要补充说明的是这里计算的仅仅是单层神经网络而且在lable原本的值是3,2,1代表的是第一次输出第三个输出单元输出为1,第二次输出第二个输出单元输出为1...

    python代码例如以下:

    #coding=utf-8 
    from numpy import *
    #from math import *
    from numpy.distutils.core import numpy_cmdclass
    f=open( r'test')
    input=[]
    #数据预处理。把文件数据转换
    for each in f:
        input.append(each.strip().split())
    n,m,p,t=input[0]
    sample=input[1:int(n)+1]
    w_in_hidden=input[int(n)+1:int(n)+6]
    w_hidden_out=input[int(n)+6:]
    feature=[]#特征矩阵
    lable=[]#标记
    for each in sample:
        feature.append(each[:-1])
        lable.append(each[-1])
    #将list转化成矩阵
    feature=mat(feature)
    lable=mat(lable)
    w_in_hidden=mat(w_in_hidden)#隐藏层与输入层的权值矩阵
    w_hidden_out=mat(w_hidden_out)#隐藏层与输出层的权值矩阵
    #逆置
    feature=feature.T
    zero=mat(ones(feature.shape[0]))
    feature=row_stack((zero,feature))
    #将第0行增加矩阵,属矩阵拼接问题
    feature=feature.astype(dtype=float)
    #生成新的矩阵,并改变矩阵内部数据类型,曾经是str型的
    w_in_hidden=w_in_hidden.astype(dtype=float)
    lable=lable.astype(dtype=float)
    w_hidden_out=w_hidden_out.astype(dtype=float)
    hidden_output=dot(w_in_hidden,feature)
    hidden_output=hidden_output.T
    #此处exp是numpy里面自带的求矩阵指数的函数
    hidden_output=1/(1+exp(-1*hidden_output))
    print hidden_output#隐藏层的输出
    hidden_output=hidden_output.T
    zero=mat(ones(hidden_output.shape[1]))
    hidden_output=row_stack((zero,hidden_output))
    output=dot(w_hidden_out,hidden_output)
    output=output.T
    output=1/(1+exp(-1*output))
    print output#输出层的输出
    #lable原本的值是3,2,1代表的是第一次输出第三个输出单元输出为1,第二次输出第二个输出单元输出为1...
    lable=mat([[0,0,1],[0,1,0],[1,0,0]])
    lable=lable.T
    output=output.tolist()#将矩阵转化回list
    lable=lable.tolist()
    sum=0.0
    #计算误差,事实上也能够直接用矩阵计算。问题在于本人没有找到求矩阵对角线和的函数。且做一标记,找到补上
    for i in range (len(output)):
        for j in range (len(output[0])):
            sum+=math.log(output[i][j])*-lable[i][j]-math.log(1-output[i][j])*(1-lable[i][j])
    print sum/3
    

    此处输出顺序不正确,请忽略这样的小问题~~

    输出结果例如以下:

    C代码例如以下:(C代码)

    #include <stdio.h>
    #include <math.h>
    
    #define MAX_SAMPLE_NUMBER 1024
    #define MAX_FEATURE_DIMENSION 128
    #define MAX_LABEL_NUMBER 12
    
    double sigmoid(double z){
    	return 1 / (1 + exp(-z));
    }
    
    double hypothesis(double x[], double theta[], int feature_number){
    //此处的hypothesis计算的是某个神经元的输出
    	double h = 0;
    	for (int i = 0; i <= feature_number; i++){
    		h += x[i] * theta[i];
    	}
    	return sigmoid(h);
    }
    
    void forward_propagation(double a[],
    						 int feature_number,
    						 double W[][MAX_FEATURE_DIMENSION],
    						 int neuron_num,
    						 double output[]){
    
    	for (int i = 0; i < neuron_num; i++){
    		output[i+1] = hypothesis(a, W[i], feature_number);
    		//w[i]相应着第i个输出神经元的上一层权值
    	}
    }
    
    double compute_cost(double X[][MAX_FEATURE_DIMENSION], 
    					int y[],
    					int feature_number,
    					int sample_number,
    					double W1[][MAX_FEATURE_DIMENSION],
    					int hidden_layer_size,
    					double W2[][MAX_FEATURE_DIMENSION],
    					int label_num,
    					double a2[][MAX_FEATURE_DIMENSION],
    					double a3[][MAX_FEATURE_DIMENSION]){
    					//a2为隐藏层输出a3为输出层输出w1,w2同样
    	double sum = 0;
    	for (int i = 0; i < sample_number; i++){
    		X[i][0] = 1;
    		forward_propagation(X[i], feature_number, W1, hidden_layer_size, a2[i]);
    		a2[i][0] = 1;
    		forward_propagation(a2[i], hidden_layer_size, W2, label_num, a3[i]);
    		double yy[MAX_LABEL_NUMBER] = {0};
    		yy[y[i]] = 1;
    		for (int j = 1; j <= label_num; j++){
    			sum += -yy[j] * log(a3[i][j]) - (1 - yy[j]) * log(1 - a3[i][j]);
    		}
    	}
    	return sum / sample_number;
    }
    
    double X[MAX_SAMPLE_NUMBER][MAX_FEATURE_DIMENSION];
    int y[MAX_SAMPLE_NUMBER];
    double W1[MAX_FEATURE_DIMENSION][MAX_FEATURE_DIMENSION];
    double W2[MAX_FEATURE_DIMENSION][MAX_FEATURE_DIMENSION];
    double a2[MAX_SAMPLE_NUMBER][MAX_FEATURE_DIMENSION];
    double a3[MAX_SAMPLE_NUMBER][MAX_FEATURE_DIMENSION];
    
    int main(){
    	int feature_number;
    	int sample_number;
    	int hidden_layer_size;
    	int label_num;
    	scanf("%d %d %d %d", &feature_number, &sample_number, &hidden_layer_size, &label_num);
    	for (int i = 0; i < sample_number; i++){
    		for (int j = 1; j <= feature_number; j++){
    			scanf("%lf", &X[i][j]);
    		}
    		scanf("%d", &y[i]);
    	}
    	for (int i = 0; i < hidden_layer_size; i++){
    		for (int j = 0; j <= feature_number; j++){
    			scanf("%lf", &W1[i][j]);
    		}
    	}
    	for (int i = 0; i < label_num; i++){
    		for (int j = 0; j <= hidden_layer_size; j++){
    			scanf("%lf", &W2[i][j]);
    		}
    	}
    	double J = compute_cost(X, y, feature_number, sample_number,
    		W1, hidden_layer_size, W2, label_num, a2, a3);
    	printf("%lf
    ", J);
    	for (int i = 0; i < sample_number; i++){
    		for (int j = 1; j < hidden_layer_size; j++){
    			printf("%lf ", a2[i][j]);
    		}
    		printf("%lf
    ", a2[i][hidden_layer_size]);
    	}
    	for (int i = 0; i < sample_number; i++){
    		for (int j = 1; j < label_num; j++){
    			printf("%lf ", a3[i][j]);
    		}
    		printf("%lf
    ", a3[i][label_num]);
    	}
    	return 0;
    }

    结果例如以下:

    关于BP算法。没有找到合适的測试例子,此处只给出了C++版本号代码和自測数据。无验证集

    C++代码:

    #include <stdio.h>
    #include <math.h>
    
    double sigmoid(double z){
    	return 1 / (1 + exp(-z));
    }
    
    double hypothesis(double x[], double theta[], int feature_number){
    	double h = 0;
    	for (int i = 0; i <= feature_number; i++){
    		h += x[i] * theta[i];
    	}
    	return h;
    }
    
    #define MAX_FEATURE_DIMENSION 128
    #define MAX_LABEL_NUMBER 12
    
    void forward_propagation(double input[],
    						 int feature_number,
    						 double W[][MAX_FEATURE_DIMENSION],
    						 int neuron_num,
    						 double z[],
    						 double a[]){
    
    	for (int i = 0; i < neuron_num; i++){
    		z[i+1] = hypothesis(input, W[i], feature_number);
    		a[i+1] = sigmoid(z[i+1]);
    		//加1的原因是第一个要留作补充的神经元
    	}
    }
    
    double sigmoid_gradient(double z){
    	return sigmoid(z) * (1 - sigmoid(z));
    	//对sigmoid函数求导能够化成如此形式,要注意的是z才是自变量
    }
    
    void compute_layer_error(double layer_error[],
    						double W[][MAX_FEATURE_DIMENSION],
    						int neuron_num,
    						int feature_number,
    						double next_layer_error[],
    						double z[]){
    //此处计算的是theta(l)详细见上一篇博文
    	for (int i = 1; i <= feature_number; i++){
    		for (int j = 0; j < neuron_num; j++){
    			layer_error[i] += W[j][i] * next_layer_error[j + 1];//next_layer_error[j + 1]=theta(l+1)
    		}
    	}
    	for (int i = 1; i <=feature_number; i++){
    		layer_error[i] = layer_error[i] * sigmoid_gradient(z[i]);
    	}
    }
    void accumulate_gradient(double sum[][MAX_FEATURE_DIMENSION], 
    						 double layer_error[],
    						 int neuron_num,
    						 int feature_number,
    						 double a[]){
    						 //计算误差总和
    	for (int i = 0; i < neuron_num; i++){
    		for (int j = 0; j <= feature_number; j++){
    			sum[i][j] += layer_error[i+1] * a[j];
    		}
    	}
    }
    
    void compute_gradient(double X[][MAX_FEATURE_DIMENSION], 
    						int y[],
    						int feature_number,
    						int sample_number,
    						double W1[][MAX_FEATURE_DIMENSION],
    						int hidden_layer_size,
    						double W2[][MAX_FEATURE_DIMENSION],
    						int label_num,
    						double w1_grad[][MAX_FEATURE_DIMENSION],
    						double w2_grad[][MAX_FEATURE_DIMENSION]){
    
    	double grad1_sum[MAX_FEATURE_DIMENSION][MAX_FEATURE_DIMENSION] = {0};
    	double grad2_sum[MAX_FEATURE_DIMENSION][MAX_FEATURE_DIMENSION] = {0};
    	for (int i = 0; i < sample_number; i++){
    		X[i][0] = 1;
    		double z2[MAX_FEATURE_DIMENSION] = {0, 0};
    		double a2[MAX_FEATURE_DIMENSION] = {1, 0};		
    		forward_propagation(X[i], feature_number, W1, hidden_layer_size, z2, a2);
    		double z3[MAX_FEATURE_DIMENSION] = {0};
    		double a3[MAX_FEATURE_DIMENSION] = {0};		
    		forward_propagation(a2, hidden_layer_size, W2, label_num, z3, a3);
    		double yy[MAX_LABEL_NUMBER] = {0};
    		yy[y[i]] = 1;
    		
    		double layer3_error[MAX_FEATURE_DIMENSION] = {0};
    		for (int j = 1; j <= label_num; j++){
    			layer3_error[j] = a3[j] - yy[j];
    		}		
    		double layer2_error[MAX_FEATURE_DIMENSION] = {0};
    		compute_layer_error(layer2_error, W2, label_num, hidden_layer_size, layer3_error, z2);
    		accumulate_gradient(grad2_sum, layer3_error, label_num, hidden_layer_size, a2);
    		accumulate_gradient(grad1_sum, layer2_error, hidden_layer_size, feature_number, X[i]);
    	}
    	for (int i = 0; i < hidden_layer_size; i++){
    		for (int j = 0; j <= feature_number; j++){
    			w1_grad[i][j] = grad1_sum[i][j] / sample_number;
    		}
    	}
    	for (int i = 0; i < label_num; i++){
    		for (int j = 0; j <= hidden_layer_size; j++){
    			w2_grad[i][j] = grad2_sum[i][j] / sample_number;
    		}
    	}
    }
    
    int main(){
    	double X[][MAX_FEATURE_DIMENSION] = {
    		{0, 0.084147, 0.090930},
    		{0, 0.090930, 0.065699},
    		{0, 2, 3}
    	};
    	int y[] = {1, 2, 2};
    	int hidden_layer_size = 4;
    	int label_num = 2;
    	int feature_number = 2;
    	int sample_number = 3;
    	double W1[][MAX_FEATURE_DIMENSION] = {
    		{0.084147, -0.027942, -0.099999},
    		{0.090930, 0.065699, -0.053657},
    		{0.014112, 0.098936, 0.042017},
    		{-0.075680, 0.041212, 0.099061},
    	};
    	double W2[][MAX_FEATURE_DIMENSION] = {
    		{0.084147, -0.075680, 0.065699, -0.054402, 0.042017},
    		{0.090930, -0.095892, 0.098936, -0.099999, 0.099061}
    	};
    	double a2[10][MAX_FEATURE_DIMENSION] = {0};
    	double a3[10][MAX_FEATURE_DIMENSION] = {0};
    
    	double w1_grad[10][MAX_FEATURE_DIMENSION] = {0};
    	double w2_grad[10][MAX_FEATURE_DIMENSION] = {0};
    
    	compute_gradient(X, y, feature_number, 3, W1,
    					hidden_layer_size, W2, label_num, w1_grad, w2_grad);
    
    	printf("w1_grad:
    ");
    	for (int i = 0; i < hidden_layer_size; i++){
    		for (int j = 0; j <= feature_number; j++){
    			printf("%lf ", w1_grad[i][j]);
    		}
    		printf("
    ");
    	}
    
    	printf("w2_grad:
    ");
    	for (int i = 0; i < label_num; i++){
    		for (int j = 0; j <= hidden_layer_size; j++){
    			printf("%lf ", w2_grad[i][j]);
    		}
    		printf("
    ");
    	}
    	
    	return 0;
    }

    执行截图:


    版权声明:本文博主原创文章,博客,未经同意不得转载。

  • 相关阅读:
    进程池,线程池,协程,gevent模块,协程实现单线程服务端与多线程客户端通信,IO模型
    线程相关 GIL queue event 死锁与递归锁 信号量l
    生产者消费者模型 线程相关
    进程的开启方式 进程的join方法 进程间的内存隔离 其他相关方法 守护进程 互斥锁
    udp协议 及相关 利用tcp上传文件 socketserver服务
    socket套接字 tcp协议下的粘包处理
    常用模块的完善 random shutil shevle 三流 logging
    day 29 元类
    Django入门
    MySQL多表查询
  • 原文地址:https://www.cnblogs.com/blfshiye/p/4886245.html
Copyright © 2011-2022 走看看