zoukankan      html  css  js  c++  java
  • deep learning:DBN源码 ----- C++

    其代码原型来自于yusugomori,DBN其主要就是RBM的堆叠,在代码实现里需要区分好第几个RBM,其每个RBM的隐层是下一个RBM的输入层。以及微调的原理就是根据已有的标签数据和最后一个隐含层生成的输出层进行调整。初学小娃。

    DBN.h

     1 class DBN {
     2 
     3 public:
     4     int N;
     5     int n_ins;
     6     int *hidden_layer_sizes;
     7     int n_outs;
     8     int n_layers;
     9     HiddenLayer **sigmoid_layers;
    10     RBM **rbm_layers;
    11     LogisticRegression *log_layer;
    12     DBN(int, int, int*, int, int);
    13     ~DBN();
    14     void pretrain(int*, double, int, int);
    15     void finetune(int*, int*, double, int);
    16     void predict(int*, double*);
    17 };

    HiddenLayer.h

     1 class HiddenLayer {
     2 
     3 public:
     4     int N;
     5     int n_in;
     6     int n_out;
     7     double **W;
     8     double *b;
     9     HiddenLayer(int, int, int, double**, double*);
    10     ~HiddenLayer();
    11     double output(int*, double*, double);
    12     void sample_h_given_v(int*, int*);
    13 };

    LogisticRegression.h

     1 class LogisticRegression {
     2 
     3 public:
     4     int N;
     5     int n_in;
     6     int n_out;
     7     double **W;
     8     double *b;
     9     LogisticRegression(int, int, int);
    10     ~LogisticRegression();
    11     void train(int*, int*, double);
    12     void softmax(double*);
    13     void predict(int*, double*);
    14 };

    RBM.h

     1 class RBM {
     2 
     3 public:
     4     int N;
     5     int n_visible;
     6     int n_hidden;
     7     double **W;
     8     double *hbias;
     9     double *vbias;
    10     RBM(int, int, int, double**, double*, double*);
    11     ~RBM();
    12     void contrastive_divergence(int*, double, int);
    13     void sample_h_given_v(int*, double*, int*);
    14     void sample_v_given_h(int*, double*, int*);
    15     double propup(int*, double*, double);
    16     double propdown(int*, int, double);
    17     void gibbs_hvh(int*, double*, int*, double*, int*);
    18     void reconstruct(int*, double*);
    19 };

    DBN.cpp

      1 #include <iostream>
      2 #include <cmath>
      3 #include "HiddenLayer.h"
      4 #include <stdlib.h>
      5 #include "RBM.h"
      6 #include "LogisticRegression.h"
      7 #include "DBN.h"
      8 using namespace std;
      9 
     10 
     11 double uniform(double min, double max)
     12 {
     13     return rand() / (RAND_MAX + 1.0) * (max - min) + min;
     14 }
     15 
     16 int binomial(int n, double p)
     17 {
     18     if(p < 0 || p > 1) return 0;
     19 
     20     int c = 0;
     21     double r;
     22 
     23     for(int i=0; i<n; i++) {
     24         r = rand() / (RAND_MAX + 1.0);
     25         if (r < p) c++;
     26     }
     27 
     28     return c;
     29 }
     30 
     31 double sigmoid(double x)
     32 {
     33     return 1.0 / (1.0 + exp(-x));
     34 }
     35 
     36 
     37 // DBN
     38 //各种初始化
     39 DBN::DBN(int size, int n_i, int *hls, int n_o, int n_l)
     40 {
     41     int input_size;
     42 
     43     N = size;
     44     n_ins = n_i;
     45     hidden_layer_sizes = hls;
     46     n_outs = n_o;
     47     n_layers = n_l;
     48 
     49     sigmoid_layers = new HiddenLayer*[n_layers];
     50     rbm_layers = new RBM*[n_layers];
     51 
     52   //初始化为多少层,在本代码是有2个RBM组成,所以是2个
     53     for(int i=0; i<n_layers; i++)
     54     {
     55         if(i == 0)
     56         {
     57         input_size = n_ins;//第一层就是原始输入层
     58         }
     59         else
     60         {
     61             input_size = hidden_layer_sizes[i-1];//第二层为第一个隐含层作为下一层的输入层
     62         }
     63 
     64     //构造隐含层
     65     sigmoid_layers[i] = new HiddenLayer(N, input_size, hidden_layer_sizes[i], NULL, NULL);
     66 
     67     //构造RBM层
     68     rbm_layers[i] = new RBM(N, input_size, hidden_layer_sizes[i],sigmoid_layers[i]->W, sigmoid_layers[i]->b, NULL);
     69     }
     70      //根据上面的构造可以发现构造出的是2层RBM结构
     71     //初始化logistic,很显然起输入层是最后一层隐含层,输出层是整个模型的输出层
     72     log_layer = new LogisticRegression(N, hidden_layer_sizes[n_layers-1], n_outs);
     73 }
     74 
     75 DBN::~DBN()
     76 {
     77     delete log_layer;
     78 
     79     for(int i=0; i<n_layers; i++)
     80     {
     81         delete sigmoid_layers[i];
     82         delete rbm_layers[i];
     83     }
     84     delete[] sigmoid_layers;
     85     delete[] rbm_layers;
     86 }
     87 
     88 //数据处理,注意是两层RBM,所以需要分开处理,另外要得到最后一层隐藏层。
     89 void DBN::pretrain(int *input, double lr, int k, int epochs)
     90 {
     91     int *layer_input;
     92     int prev_layer_input_size;
     93     int *prev_layer_input;
     94 
     95     int *train_X = new int[n_ins];
     96 
     97     for(int i=0; i<n_layers; i++)
     98     {
     99         for(int epoch=0; epoch<epochs; epoch++)
    100         {
    101             for(int n=0; n<N; n++)
    102             {
    103                 for(int m=0; m<n_ins; m++) train_X[m] = input[n * n_ins + m];
    104                 for(int l=0; l<=i; l++)
    105                 {
    106                     if(l == 0)
    107                     {
    108                         layer_input = new int[n_ins];
    109                         for(int j=0; j<n_ins; j++) layer_input[j] = train_X[j];
    110                     }
    111                     else
    112                     {
    113                         if(l == 1) prev_layer_input_size = n_ins;
    114                         else prev_layer_input_size = hidden_layer_sizes[l-2];
    115                         prev_layer_input = new int[prev_layer_input_size];
    116                         for(int j=0; j<prev_layer_input_size; j++) prev_layer_input[j] = layer_input[j];
    117                         delete[] layer_input;
    118 
    119                         layer_input = new int[hidden_layer_sizes[l-1]];
    120                         sigmoid_layers[l-1]->sample_h_given_v(prev_layer_input, layer_input);
    121                         delete[] prev_layer_input;
    122                     }
    123                 }
    124                 rbm_layers[i]->contrastive_divergence(layer_input, lr, k);
    125             }
    126         }
    127     }
    128     delete[] train_X;
    129     delete[] layer_input;
    130 }
    131 //微调就是根据标签数据的标签来进行微调,过程和LR类似。
    132 void DBN::finetune(int *input, int *label, double lr, int epochs)
    133 {
    134     int *layer_input;
    135     int *prev_layer_input;
    136 
    137     int *train_X = new int[n_ins];
    138     int *train_Y = new int[n_outs];
    139 
    140     for(int epoch=0; epoch<epochs; epoch++)
    141     {
    142         for(int n=0; n<N; n++)
    143         {
    144             for(int m=0; m<n_ins; m++)  train_X[m] = input[n * n_ins + m];
    145             for(int m=0; m<n_outs; m++) train_Y[m] = label[n * n_outs + m];
    146             for(int i=0; i<n_layers; i++)
    147             {
    148                 if(i == 0)
    149                 {
    150                     prev_layer_input = new int[n_ins];
    151                     for(int j=0; j<n_ins; j++) prev_layer_input[j] = train_X[j];
    152                 }
    153                 else
    154                 {
    155                     prev_layer_input = new int[hidden_layer_sizes[i-1]];
    156                     for(int j=0; j<hidden_layer_sizes[i-1]; j++) prev_layer_input[j] = layer_input[j];
    157                     delete[] layer_input;
    158                 }
    159                 layer_input = new int[hidden_layer_sizes[i]];
    160                 sigmoid_layers[i]->sample_h_given_v(prev_layer_input, layer_input);
    161                 delete[] prev_layer_input;
    162             }
    163         log_layer->train(layer_input, train_Y, lr);
    164         }
    165     }
    166     delete[] layer_input;
    167     delete[] train_X;
    168     delete[] train_Y;
    169 }
    170 
    171 void DBN::predict(int *x, double *y)
    172 {
    173     double *layer_input;
    174     double *prev_layer_input;
    175 
    176     double linear_output;
    177     prev_layer_input = new double[n_ins];
    178     for(int j=0; j<n_ins; j++) prev_layer_input[j] = x[j];
    179     for(int i=0; i<n_layers; i++)
    180     {
    181         layer_input = new double[sigmoid_layers[i]->n_out];
    182         for(int k=0; k<sigmoid_layers[i]->n_out; k++)
    183         {
    184             linear_output = 0.0;
    185             for(int j=0; j<sigmoid_layers[i]->n_in; j++)
    186             {
    187                 linear_output += sigmoid_layers[i]->W[k][j] * prev_layer_input[j];
    188             }
    189             linear_output += sigmoid_layers[i]->b[k];
    190             layer_input[k] = sigmoid(linear_output);
    191         }
    192         delete[] prev_layer_input;
    193 
    194         if(i < n_layers-1)
    195         {
    196             prev_layer_input = new double[sigmoid_layers[i]->n_out];
    197             for(int j=0; j<sigmoid_layers[i]->n_out; j++) prev_layer_input[j] = layer_input[j];
    198             delete[] layer_input;
    199         }
    200     }
    201 
    202     for(int i=0; i<log_layer->n_out; i++)
    203     {
    204         y[i] = 0;
    205         for(int j=0; j<log_layer->n_in; j++)
    206         {
    207             y[i] += log_layer->W[i][j] * layer_input[j];
    208         }
    209         y[i] += log_layer->b[i];
    210     }
    211     log_layer->softmax(y);
    212     delete[] layer_input;
    213 }
    214 
    215 
    216 // HiddenLayer
    217 HiddenLayer::HiddenLayer(int size, int in, int out, double **w, double *bp)
    218 {
    219     N = size;
    220     n_in = in;
    221     n_out = out;
    222 
    223     if(w == NULL)
    224     {
    225         W = new double*[n_out];
    226         for(int i=0; i<n_out; i++) W[i] = new double[n_in];
    227         double a = 1.0 / n_in;
    228 
    229         for(int i=0; i<n_out; i++)
    230         {
    231             for(int j=0; j<n_in; j++)
    232             {
    233                 W[i][j] = uniform(-a, a);
    234             }
    235         }
    236     }
    237     else
    238     {
    239         W = w;
    240     }
    241 
    242     if(bp == NULL)
    243     {
    244         b = new double[n_out];
    245     }
    246     else
    247     {
    248         b = bp;
    249     }
    250 }
    251 
    252 HiddenLayer::~HiddenLayer()
    253 {
    254     for(int i=0; i<n_out; i++) delete W[i];
    255     delete[] W;
    256     delete[] b;
    257 }
    258 
    259 double HiddenLayer::output(int *input, double *w, double b)
    260 {
    261     double linear_output = 0.0;
    262     for(int j=0; j<n_in; j++)
    263     {
    264         linear_output += w[j] * input[j];
    265     }
    266     linear_output += b;
    267     return sigmoid(linear_output);
    268 }
    269 
    270 void HiddenLayer::sample_h_given_v(int *input, int *sample)
    271 {
    272     for(int i=0; i<n_out; i++)
    273     {
    274         sample[i] = binomial(1, output(input, W[i], b[i]));
    275     }
    276 }
    277 
    278 
    279 // RBM
    280 RBM::RBM(int size, int n_v, int n_h, double **w, double *hb, double *vb)
    281 {
    282     N = size;
    283     n_visible = n_v;
    284     n_hidden = n_h;
    285 
    286     if(w == NULL)
    287     {
    288         W = new double*[n_hidden];
    289         for(int i=0; i<n_hidden; i++) W[i] = new double[n_visible];
    290         double a = 1.0 / n_visible;
    291 
    292         for(int i=0; i<n_hidden; i++)
    293         {
    294             for(int j=0; j<n_visible; j++)
    295             {
    296                 W[i][j] = uniform(-a, a);
    297             }
    298         }
    299     }
    300     else
    301     {
    302         W = w;
    303     }
    304 
    305     if(hb == NULL)
    306     {
    307         hbias = new double[n_hidden];
    308         for(int i=0; i<n_hidden; i++) hbias[i] = 0;
    309     }
    310     else
    311     {
    312         hbias = hb;
    313     }
    314 
    315     if(vb == NULL)
    316     {
    317         vbias = new double[n_visible];
    318         for(int i=0; i<n_visible; i++) vbias[i] = 0;
    319     }
    320     else
    321     {
    322         vbias = vb;
    323     }
    324 }
    325 
    326 RBM::~RBM()
    327 {
    328     delete[] vbias;
    329 }
    330 
    331 
    332 void RBM::contrastive_divergence(int *input, double lr, int k)
    333 {
    334     double *ph_mean = new double[n_hidden];
    335     int *ph_sample = new int[n_hidden];
    336     double *nv_means = new double[n_visible];
    337     int *nv_samples = new int[n_visible];
    338     double *nh_means = new double[n_hidden];
    339     int *nh_samples = new int[n_hidden];
    340 
    341   /* CD-k */
    342     sample_h_given_v(input, ph_mean, ph_sample);//获得h0
    343 
    344     for(int step=0; step<k; step++)
    345     {
    346         if(step == 0)
    347         {
    348             gibbs_hvh(ph_sample, nv_means, nv_samples, nh_means, nh_samples);//获得V1,h1
    349         }
    350         else
    351         {
    352             gibbs_hvh(nh_samples, nv_means, nv_samples, nh_means, nh_samples);
    353         }
    354     }
    355 
    356     //更新权值,双向偏移量。由于hinton提出的CD-K,可以知道其v0代表的是原始数据x
    357     //h0即ph_sigm_out,h0近似等于对v0下h的概率
    358     //v1即代表的是经过一次转换后的x,近似等于对h0下v的概率。
    359     //h1同理。CD-K主要就是求出这个三个数据,便能够很好的近似计算梯度。至于为什么我也不知道。
    360     for(int i=0; i<n_hidden; i++)
    361     {
    362         for(int j=0; j<n_visible; j++)
    363         {
    364       //可以根据权重公式发现,其实P(hi=1|v)代表的就是h0,p(hi=1|Vyk)和Vyk代表的就是h1和V1.
    365             W[i][j] += lr * (ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
    366         }
    367         hbias[i] += lr * (ph_sample[i] - nh_means[i]) / N;
    368     }
    369 
    370     for(int i=0; i<n_visible; i++)
    371     {
    372         vbias[i] += lr * (input[i] - nv_samples[i]) / N;
    373     }
    374 
    375     delete[] ph_mean;
    376     delete[] ph_sample;
    377     delete[] nv_means;
    378     delete[] nv_samples;
    379     delete[] nh_means;
    380     delete[] nh_samples;
    381 }
    382 
    383 void RBM::sample_h_given_v(int *v0_sample, double *mean, int *sample)
    384 {
    385     for(int i=0; i<n_hidden; i++)
    386     {
    387         mean[i] = propup(v0_sample, W[i], hbias[i]);
    388         sample[i] = binomial(1, mean[i]);
    389     }
    390 }
    391 
    392 void RBM::sample_v_given_h(int *h0_sample, double *mean, int *sample)
    393 {
    394     for(int i=0; i<n_visible; i++)
    395     {
    396         mean[i] = propdown(h0_sample, i, vbias[i]);
    397         sample[i] = binomial(1, mean[i]);
    398     }
    399 }
    400 
    401 double RBM::propup(int *v, double *w, double b)
    402 {
    403     double pre_sigmoid_activation = 0.0;
    404     for(int j=0; j<n_visible; j++)
    405     {
    406         pre_sigmoid_activation += w[j] * v[j];
    407     }
    408     pre_sigmoid_activation += b;
    409     return sigmoid(pre_sigmoid_activation);
    410 }
    411 
    412 double RBM::propdown(int *h, int i, double b)
    413 {
    414     double pre_sigmoid_activation = 0.0;
    415     for(int j=0; j<n_hidden; j++)
    416     {
    417         pre_sigmoid_activation += W[j][i] * h[j];
    418     }
    419     pre_sigmoid_activation += b;
    420     return sigmoid(pre_sigmoid_activation);
    421 }
    422 
    423 void RBM::gibbs_hvh(int *h0_sample, double *nv_means, int *nv_samples,double *nh_means, int *nh_samples)
    424 {
    425     sample_v_given_h(h0_sample, nv_means, nv_samples);
    426     sample_h_given_v(nv_samples, nh_means, nh_samples);
    427 }
    428 
    429 void RBM::reconstruct(int *v, double *reconstructed_v)
    430 {
    431     double *h = new double[n_hidden];
    432     double pre_sigmoid_activation;
    433 
    434     for(int i=0; i<n_hidden; i++)
    435     {
    436         h[i] = propup(v, W[i], hbias[i]);
    437     }
    438 
    439     for(int i=0; i<n_visible; i++)
    440     {
    441         pre_sigmoid_activation = 0.0;
    442         for(int j=0; j<n_hidden; j++)
    443         {
    444             pre_sigmoid_activation += W[j][i] * h[j];
    445         }
    446         pre_sigmoid_activation += vbias[i];
    447         reconstructed_v[i] = sigmoid(pre_sigmoid_activation);
    448     }
    449     delete[] h;
    450 }
    451 
    452 
    453 // LogisticRegression
    454 LogisticRegression::LogisticRegression(int size, int in, int out)
    455 {
    456     N = size;
    457     n_in = in;
    458     n_out = out;
    459 
    460     W = new double*[n_out];
    461     for(int i=0; i<n_out; i++) W[i] = new double[n_in];
    462     b = new double[n_out];
    463 
    464     for(int i=0; i<n_out; i++)
    465     {
    466         for(int j=0; j<n_in; j++)
    467         {
    468             W[i][j] = 0;
    469         }
    470         b[i] = 0;
    471     }
    472 }
    473 
    474 LogisticRegression::~LogisticRegression()
    475 {
    476     for(int i=0; i<n_out; i++) delete[] W[i];
    477     delete[] W;
    478     delete[] b;
    479 }
    480 
    481 
    482 void LogisticRegression::train(int *x, int *y, double lr)
    483 {
    484     double *p_y_given_x = new double[n_out];
    485     double *dy = new double[n_out];
    486 
    487     for(int i=0; i<n_out; i++)
    488     {
    489         p_y_given_x[i] = 0;
    490         for(int j=0; j<n_in; j++)
    491         {
    492             p_y_given_x[i] += W[i][j] * x[j];
    493         }
    494         p_y_given_x[i] += b[i];
    495     }
    496     softmax(p_y_given_x);
    497 
    498     for(int i=0; i<n_out; i++)
    499     {
    500         dy[i] = y[i] - p_y_given_x[i];
    501         for(int j=0; j<n_in; j++)
    502         {
    503             W[i][j] += lr * dy[i] * x[j] / N;
    504         }
    505         b[i] += lr * dy[i] / N;
    506     }
    507 
    508     delete[] p_y_given_x;
    509     delete[] dy;
    510 }
    511 
    512 void LogisticRegression::softmax(double *x)
    513 {
    514     double max = 0.0;
    515     double sum = 0.0;
    516 
    517     for(int i=0; i<n_out; i++) if(max < x[i]) max = x[i];
    518     for(int i=0; i<n_out; i++)
    519     {
    520         x[i] = exp(x[i] - max);
    521         sum += x[i];
    522     }
    523 
    524     for(int i=0; i<n_out; i++) x[i] /= sum;
    525 }
    526 
    527 void LogisticRegression::predict(int *x, double *y)
    528 {
    529     for(int i=0; i<n_out; i++)
    530     {
    531         y[i] = 0;
    532         for(int j=0; j<n_in; j++)
    533         {
    534             y[i] += W[i][j] * x[j];
    535         }
    536         y[i] += b[i];
    537     }
    538     softmax(y);
    539 }
    540 
    541 void test_dbn()
    542 {
    543     srand(0);
    544 
    545     double pretrain_lr = 0.1;
    546     int pretraining_epochs = 1000;
    547     int k = 1;
    548     double finetune_lr = 0.1;
    549     int finetune_epochs = 500;
    550 
    551     int train_N = 6;
    552     int test_N = 3;
    553     int n_ins = 6;
    554     int n_outs = 2;
    555     int hidden_layer_sizes[] = {3, 3};
    556     int n_layers = sizeof(hidden_layer_sizes) / sizeof(hidden_layer_sizes[0]);
    557 
    558   // training data
    559     int train_X[6][6] = {
    560         {1, 1, 1, 0, 0, 0},
    561         {1, 0, 1, 0, 0, 0},
    562         {1, 1, 1, 0, 0, 0},
    563         {0, 0, 1, 1, 1, 0},
    564         {0, 0, 1, 1, 0, 0},
    565         {0, 0, 1, 1, 1, 0}
    566     };
    567 
    568     int train_Y[6][2] = {
    569         {1, 0},
    570         {1, 0},
    571         {1, 0},
    572         {0, 1},
    573         {0, 1},
    574         {0, 1}
    575     };
    576     //构造RBN包括构造多层,隐藏层,RBM及LR
    577     DBN dbn(train_N, n_ins, hidden_layer_sizes, n_outs, n_layers);
    578     //预处理过程
    579     dbn.pretrain(*train_X, pretrain_lr, k, pretraining_epochs);
    580     //微调
    581     dbn.finetune(*train_X, *train_Y, finetune_lr, finetune_epochs);
    582   // test data
    583     int test_X[3][6] = {
    584         {1, 1, 0, 0, 0, 0},
    585         {0, 0, 0, 1, 1, 0},
    586         {1, 1, 1, 1, 1, 0}
    587     };
    588     double test_Y[3][2];
    589   // test
    590     for(int i=0; i<test_N; i++)
    591     {
    592         dbn.predict(test_X[i], test_Y[i]);
    593         for(int j=0; j<n_outs; j++)
    594         {
    595             cout << test_Y[i][j] << " ";
    596         }
    597         cout << endl;
    598     }
    599 }
    600 
    601 int main()
    602 {
    603     test_dbn();
    604     return 0;
    605 }
  • 相关阅读:
    4G(LTE)背后的技术和利益纠结——VoIP,VoLTE,SIP,IMS的前世今生
    Windows抓取本地回环数据包
    SIP中的早期媒体与回铃音的产生
    SpringMVC整合
    浮点数转换为十进制
    将Sublime Text 2搭建成一个好用的IDE
    python3 'gbk' codec can't decode byte 0x80 in position 读取文件编码集错误的一个bug
    Matplotlib入门教程
    turtle教程-Python绘图
    python画图模块之一:turtle(1) 画五角星、正方形等
  • 原文地址:https://www.cnblogs.com/wn19910213/p/3449848.html
Copyright © 2011-2022 走看看