zoukankan      html  css  js  c++  java
  • Reducing the Dimensionality of data with neural networks / A fast learing algorithm for deep belief net

    Deeplearning原文作者Hinton代码注解

      1 Matlab示例代码为两部分,分别对应不同的论文:
      2 
      3 1. Reducing the Dimensionality of data with neural networks 
      4 
      5   ministdeepauto.m   backprop.m   rbmhidlinear.m
      6 
      7 2. A fast learing algorithm for deep belief net
      8 
      9   mnistclassify.m   backpropclassfy.m  
     10 
     11  其余部分代码通用。
     12 
     13 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     14 mnistclassify.m
     15 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     16 
     17 clear all
     18 close all
     19 
     20 maxepoch=50; %迭代次数
     21 numhid=500; numpen=500; numpen2=2000;
     22 
     23 fprintf(1,'Converting Raw files into Matlab format 
    ');
     24 converter;
     25 
     26 fprintf(1,'Pretraining a deep autoencoder. 
    ');
     27 fprintf(1,'The Science paper used 50 epochs. This uses %3i 
    ', maxepoch);
     28 
     29 makebatches;%分批数据 
     30 [numcases numdims numbatches]=size(batchdata); %获取batchdata数据大小
     31 %%numcases 每批数据的个数
     32 %%numdims 数据元组的维度
     33 %%numbtches 数据批数
     34 
     35 fprintf(1,'Pretraining Layer 1 with RBM: %d-%d 
    ',numdims,numhid);%图像输入层到第一个隐藏层
     36 restart=1; %设置初始化参数
     37 rbm; %调用RBM训练数据 
     38 hidrecbiases=hidbiases; %获取隐藏层偏置值
     39 save mnistvhclassify vishid hidrecbiases visbiases; %
     40 
     41 fprintf(1,'
    Pretraining Layer 2 with RBM: %d-%d 
    ',numhid,numpen);%第一个隐藏层到第二个隐藏层
     42 batchdata=batchposhidprobs; %上一个RBM的隐藏层输出,读入作为这个RBM的输入
     43 numhid=numpen;%设置隐藏层的节点数,输入的节点数已经由读入数据给出
     44 restart=1;
     45 rbm;
     46 hidpen=vishid; penrecbiases=hidbiases; hidgenbiases=visbiases; %同上,提取权值,偏置,
     47 save mnisthpclassify hidpen penrecbiases hidgenbiases;
     48 
     49 fprintf(1,'
    Pretraining Layer 3 with RBM: %d-%d 
    ',numpen,numpen2);%第二个隐藏层到第三层隐藏层,其余同上
     50 batchdata=batchposhidprobs;
     51 numhid=numpen2;
     52 restart=1;
     53 rbm;
     54 hidpen2=vishid; penrecbiases2=hidbiases; hidgenbiases2=visbiases;
     55 save mnisthp2classify hidpen2 penrecbiases2 hidgenbiases2;
     56 
     57 backpropclassify;
     58 
     59  
     60 
     61 
     62 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     63 backpropclassify.m
     64 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     65 maxepoch=200;
     66 fprintf(1,'
    Training discriminative model on MNIST by minimizing cross entropy error. 
    ');%最小化交叉熵
     67 fprintf(1,'60 batches of 1000 cases each. 
    ');
     68 
     69 load mnistvhclassify%加载各层之间的权值,以及偏置
     70 load mnisthpclassify
     71 load mnisthp2classify
     72 
     73 makebatches;%分批数据
     74 [numcases numdims numbatches]=size(batchdata);
     75 N=numcases; %获取每批数据向量数
     76 
     77 %%%% PREINITIALIZE WEIGHTS OF THE DISCRIMINATIVE MODEL%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     78 
     79 w1=[vishid; hidrecbiases];%第一层到第二层的权重,以及第二层的偏置
     80 w2=[hidpen; penrecbiases];%类上
     81 w3=[hidpen2; penrecbiases2];%类上
     82 w_class = 0.1*randn(size(w3,2)+1,10);%随机生成第四层列数+1行,10列的矩阵
     83 %%%%%%%%%% END OF PREINITIALIZATIO OF WEIGHTS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     84 
     85 l1=size(w1,1)-1;%获取每层的单元个数
     86 l2=size(w2,1)-1;
     87 l3=size(w3,1)-1;
     88 l4=size(w_class,1)-1;%最高层的单元个数
     89 l5=10; %label层单元个数
     90 test_err=[];%
     91 train_err=[];%
     92 
     93 
     94 for epoch = 1:maxepoch
     95 
     96 %%%%%%%%%%%%%%%%%%%% COMPUTE TRAINING MISCLASSIFICATION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     97 err=0; 
     98 err_cr=0;
     99 counter=0;
    100 [numcases numdims numbatches]=size(batchdata);
    101 %%numcases 每批数据的个数
    102 %%numdims 数据元组的维度
    103 %%numbtches 数据批数
    104 N=numcases;%%每批次数据向量个数
    105 for batch = 1:numbatches
    106 data = [batchdata(:,:,batch)];%读取一批次数据
    107 target = [batchtargets(:,:,batch)];%读取当前批次的目标值
    108 data = [data ones(N,1)];%在原数据后添加N行1列数据
    109 w1probs = 1./(1 + exp(-data*w1)); w1probs = [w1probs ones(N,1)];%sigmod计算各层的概率值,参见BP算法
    110 w2probs = 1./(1 + exp(-w1probs*w2)); w2probs = [w2probs ones(N,1)];
    111 w3probs = 1./(1 + exp(-w2probs*w3)); w3probs = [w3probs ones(N,1)];
    112 
    113 targetout = exp(w3probs*w_class);%计算最后的输出值N行10列
    114 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    115 %对最后的label的输出处理过程,见公式6.1,其中w3probs*w_class是label的输入
    116 %最后只能有一个单元被激活,激活单元的选择即通过下面计算得出的概率来进行选择
    117 %10个单元组成的“softmax”组
    118 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    119 targetout = targetout./repmat(sum(targetout,2),1,10);%计算最后10个label输出除以输出值的总和
    120 
    121 [I J]=max(targetout,[],2);%取计算结果每行中的最大值,以及其列标
    122 [I1 J1]=max(target,[],2);%取原先设定目标值的最大值以及列标
    123 counter=counter+length(find(J==J1));%统计正确的条数
    124 err_cr = err_cr- sum(sum( target(:,1:end).*log(targetout))) ; %%%%????
    125 end
    126 train_err(epoch)=(numcases*numbatches-counter);%总的错误条数???
    127 train_crerr(epoch)=err_cr/numbatches;%平均每批次错误率???
    128 
    129 %%%%%%%%%%%%%% END OF COMPUTING TRAINING MISCLASSIFICATION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    130 
    131 %%%%%%%%%%%%%%%%%%%% COMPUTE TEST MISCLASSIFICATION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    132 err=0;
    133 err_cr=0;
    134 counter=0;
    135 [testnumcases testnumdims testnumbatches]=size(testbatchdata);
    136 
    137 N=testnumcases;
    138 for batch = 1:testnumbatches
    139 data = [testbatchdata(:,:,batch)];
    140 target = [testbatchtargets(:,:,batch)];
    141 data = [data ones(N,1)];
    142 w1probs = 1./(1 + exp(-data*w1)); w1probs = [w1probs ones(N,1)];
    143 w2probs = 1./(1 + exp(-w1probs*w2)); w2probs = [w2probs ones(N,1)];
    144 w3probs = 1./(1 + exp(-w2probs*w3)); w3probs = [w3probs ones(N,1)];
    145 targetout = exp(w3probs*w_class);
    146 targetout = targetout./repmat(sum(targetout,2),1,10);
    147 
    148 [I J]=max(targetout,[],2);
    149 [I1 J1]=max(target,[],2);
    150 counter=counter+length(find(J==J1));
    151 err_cr = err_cr- sum(sum( target(:,1:end).*log(targetout))) ;
    152 end
    153 test_err(epoch)=(testnumcases*testnumbatches-counter);
    154 test_crerr(epoch)=err_cr/testnumbatches;
    155 fprintf(1,'Before epoch %d Train # misclassified: %d (from %d). Test # misclassified: %d (from %d) 	 	 
    ',...
    156 epoch,train_err(epoch),numcases*numbatches,test_err(epoch),testnumcases*testnumbatches);
    157 
    158 %%%%%%%%%%%%%% END OF COMPUTING TEST MISCLASSIFICATION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    159 
    160 tt=0; 
    161 for batch = 1:numbatches/10
    162 fprintf(1,'epoch %d batch %d
    ',epoch,batch);
    163 
    164 %%%%%%%%%%% COMBINE 10 MINIBATCHES INTO 1 LARGER MINIBATCH %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    165 %组合10个小批次为1000样例的批次,然后用conjugate gradient来进行微调
    166 tt=tt+1; 
    167 data=[];
    168 targets=[]; 
    169 for kk=1:10
    170 data=[data 
    171 batchdata(:,:,(tt-1)*10+kk)]; %10个小批次合成
    172 targets=[targets
    173 batchtargets(:,:,(tt-1)*10+kk)];
    174 end
    175 
    176 %%%%%%%%%%%%%%% PERFORM CONJUGATE GRADIENT WITH 3 LINESEARCHES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    177 max_iter=3; %设置线性搜索的次数
    178 
    179 if epoch<6 % First update top-level weights holding other weights fixed. 
    180 N = size(data,1); %获取数据的行数 
    181 XX = [data ones(N,1)]; %每行数据后面增加1,用来增加偏置
    182 w1probs = 1./(1 + exp(-XX*w1)); w1probs = [w1probs ones(N,1)];
    183 w2probs = 1./(1 + exp(-w1probs*w2)); w2probs = [w2probs ones(N,1)];
    184 w3probs = 1./(1 + exp(-w2probs*w3)); %w3probs = [w3probs ones(N,1)];
    185 
    186 VV = [w_class(:)']'; %VV将随机生成的向量w_class展开成一列???为什么展开成一列与minimize的参数有关
    187 %
    188 Dim = [l4; l5]; %记录最后两层的单元节点数,即2000的隐藏层和10的label层
    189 [X, fX] = minimize(VV,'CG_CLASSIFY_INIT',max_iter,Dim,w3probs,targets);%只训练两层 %%%详细见函数定义
    190 %minimize is Cari Rasmusssen's "minimize" code
    191 %%------------------参数含义------------------%%
    192 %VV 随机权重向量的展开 ,其作为输入参数,列必须为1(D by 1) 
    193 %X 函数f="CG_CLASSIFY_INIT"的最优化参数
    194 %fX 函数f对X的偏导
    195 %max_iter 如果为正,表示线性搜索次数,为负,函数的最大值个数
    196 %%-------------------------------------------------%
    197 w_class = reshape(X,l4+1,l5);%恢复权值矩阵结构
    198 
    199 else %进入整体微调过程
    200 VV = [w1(:)' w2(:)' w3(:)' w_class(:)']'; %将所有权值按列展开成一列
    201 Dim = [l1; l2; l3; l4; l5]; %记录各层单元个数传入
    202 [X, fX] = minimize(VV,'CG_CLASSIFY',max_iter,Dim,data,targets);
    203 
    204 w1 = reshape(X(1:(l1+1)*l2),l1+1,l2); %恢复W1权值1.0
    205 xxx = (l1+1)*l2; %临时变量,用于恢复权值单元
    206 w2 = reshape(X(xxx+1:xxx+(l2+1)*l3),l2+1,l3);
    207 xxx = xxx+(l2+1)*l3;
    208 w3 = reshape(X(xxx+1:xxx+(l3+1)*l4),l3+1,l4);
    209 xxx = xxx+(l3+1)*l4;
    210 w_class = reshape(X(xxx+1:xxx+(l4+1)*l5),l4+1,l5);
    211 
    212 end
    213 %%%%%%%%%%%%%%% END OF CONJUGATE GRADIENT WITH 3 LINESEARCHES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    214 
    215 end
    216 
    217 save mnistclassify_weights w1 w2 w3 w_class
    218 save mnistclassify_error test_err test_crerr train_err train_crerr;
    219 
    220 end
    221 
    222  
    223 
    224 
    225 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    226 rbm.m
    227 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    228 epsilonw = 0.1; % Learning rate for weights 
    229 epsilonvb = 0.1; % Learning rate for biases of visible units 
    230 epsilonhb = 0.1; % Learning rate for biases of hidden units 
    231 weightcost = 0.0002; 
    232 initialmomentum = 0.5;
    233 finalmomentum = 0.9;
    234 
    235 [numcases numdims numbatches]=size(batchdata);
    236 %%numcases 每批数据的个数
    237 %%numdims 数据元组的维度
    238 %%numbtches 数据批数
    239 
    240 if restart ==1,
    241 restart=0;
    242 epoch=1;
    243 
    244 % Initializing symmetric weights and biases. 初始化对称权值和偏置
    245 vishid = 0.1*randn(numdims, numhid); %初始化生成可视层到隐藏层的权值
    246 hidbiases = zeros(1,numhid);%隐藏单元的偏置值
    247 visbiases = zeros(1,numdims);%可见单元的偏置值
    248 
    249 poshidprobs = zeros(numcases,numhid); %正向的隐藏单元概率生成
    250 neghidprobs = zeros(numcases,numhid);%反向的隐藏单元概率生成
    251 posprods = zeros(numdims,numhid);%正向可见单元概率生成
    252 negprods = zeros(numdims,numhid);%反向可见单元概率生成
    253 vishidinc = zeros(numdims,numhid);%%%%%可视单元和隐藏单元之间的权值增量
    254 hidbiasinc = zeros(1,numhid);%%隐藏单元的偏置增量
    255 visbiasinc = zeros(1,numdims);%%可视单元的偏置增量
    256 batchposhidprobs=zeros(numcases,numhid,numbatches);%存储每次迭代计算好的每层的隐藏层概率,作为下一个RBM的输入
    257 end
    258 
    259 %%%%%%%%%%%%%%%%简单输出 迭代次数 处理的批次%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    260 for epoch = epoch:maxepoch, %迭代处理
    261 fprintf(1,'epoch %d
    ',epoch); 
    262 errsum=0; %初始化输出错误为0
    263 for batch = 1:numbatches, %每次处理一批次的数据
    264 fprintf(1,'epoch %d batch %d
    ',epoch,batch);
    265 
    266 %%%%%%%%% START POSITIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    267 data = batchdata(:,:,batch); %读取当前批次的全部数据vi
    268 poshidprobs = 1./(1 + exp(-data*vishid - repmat(hidbiases,numcases,1))); %计算前向传播的隐藏层概率hi
    269 batchposhidprobs(:,:,batch)=poshidprobs;%将计算好的概率赋值给当前批次前向传播的隐藏层最后一次计算好的值作为下一层的输入
    270 posprods = data' * poshidprobs;%contrastive divergence过程<vi,hi>
    271 
    272 poshidact = sum(poshidprobs);%average-wise隐藏层激活概率值
    273 posvisact = sum(data);%average-wise可视层激活概率值
    274 
    275 %%%%%%%%% END OF POSITIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    276 poshidstates = poshidprobs > rand(numcases,numhid);%gibbs抽样,设定状态
    277 
    278 %%%%%%%%% START NEGATIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    279 negdata = 1./(1 + exp(-poshidstates*vishid' - repmat(visbiases,numcases,1)));%根据hi计算vi+1
    280 neghidprobs = 1./(1 + exp(-negdata*vishid - repmat(hidbiases,numcases,1))); %根据vi+1计算hi+1
    281 negprods = negdata'*neghidprobs;%contrastive divergence <vi+1,hi+1>
    282 
    283 neghidact = sum(neghidprobs);
    284 negvisact = sum(negdata);
    285 
    286 %%%%%%%%% END OF NEGATIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    287 err= sum(sum( (data-negdata).^2 )); %重新构建数据的方差
    288 errsum = err + errsum;%整体方差
    289 
    290 if epoch>5, %迭代次数不同调整冲量
    291 momentum=finalmomentum;
    292 else
    293 momentum=initialmomentum;
    294 end;
    295 
    296 %%%%%%%%% UPDATE WEIGHTS AND BIASES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
    297 vishidinc = momentum*vishidinc + ...
    298 epsilonw*( (posprods-negprods)/numcases - weightcost*vishid);%权重增量计算
    299 visbiasinc = momentum*visbiasinc + (epsilonvb/numcases)*(posvisact-negvisact);%偏置增量计算
    300 hidbiasinc = momentum*hidbiasinc + (epsilonhb/numcases)*(poshidact-neghidact);%隐藏层增量计算
    301 
    302 vishid = vishid + vishidinc;
    303 visbiases = visbiases + visbiasinc;
    304 hidbiases = hidbiases + hidbiasinc;
    305 
    306 %%%%%%%%%%%%%%%% END OF UPDATES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    307 
    308 end
    309 fprintf(1, 'epoch %4i error %6.1f 
    ', epoch, errsum); 
    310 end;
    311 
    312  
    313 
    314 
    315 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    316 CG_CLASSIFY_INIT.M
    317 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    318 function [f, df] = CG_CLASSIFY_INIT(VV,Dim,w3probs,target);%CG对最上面两层的训练
    319 l1 = Dim(1);
    320 l2 = Dim(2);
    321 N = size(w3probs,1); 
    322 % Do decomversion.
    323 w_class = reshape(VV,l1+1,l2); %恢复权重,
    324 w3probs = [w3probs ones(N,1)]; %一列,偏置
    325 
    326 targetout = exp(w3probs*w_class); %计算label层的输出结果为numbercase*lablesnumber的矩阵
    327 targetout = targetout./repmat(sum(targetout,2),1,10); %选择最后的激活单元,见backpropclassify.m 的76行
    328 f = -sum(sum( target(:,1:end).*log(targetout))) ; %交叉熵 只采用了前边部分
    329 
    330 IO = (targetout-target(:,1:end)); % 输入和输出结果之间的差值
    331 Ix_class=IO; %
    332 dw_class = w3probs'*Ix_class;%导数F(x)((1-F(x))乘以输出结果的偏差..其中F为sigmoid函数
    333 
    334 df = [dw_class(:)']';
    335 
    336  
    337 
    338  
    339 
    340 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    341 CG_CLASSIFY.M
    342 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    343 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    344 % 该段代码对所有权重进行整体微调
    345 % 各部分过程见 CG_CLASSIFY_INIT.m注解
    346 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    347 function [f, df] = CG_CLASSIFY(VV,Dim,XX,target);
    348 
    349 
    350 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    351 rbmhidlinear.m
    352 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    353 %除了最后计算单元值采用的是线性单元其余过程全部一样
    354 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    355 
    356 复制代码
  • 相关阅读:
    【02】AJAX XMLHttpRequest对象
    【01】什么是AJAX
    NPM是什么
    nodejs npm常用命令
    angular(转)
    s6 传输层
    s6-9 TCP 定时器
    s6-8 TCP 拥塞控制
    s6-7 TCP 传输策略
    s6-6 TCP 连接释放
  • 原文地址:https://www.cnblogs.com/yymn/p/4619335.html
Copyright © 2011-2022 走看看