zoukankan      html  css  js  c++  java
  • 【DeepLearning】Exercise: Implement deep networks for digit classification

    Exercise: Implement deep networks for digit classification

    习题链接:Exercise: Implement deep networks for digit classification

    stackedAEPredict.m

    function [pred] = stackedAEPredict(theta, inputSize, hiddenSize, numClasses, netconfig, data)
                                             
    % stackedAEPredict: Takes a trained theta and a test data set,
    % and returns the predicted labels for each example.
                                             
    % theta: trained weights from the autoencoder
    % visibleSize: the number of input units
    % hiddenSize:  the number of hidden units *at the 2nd layer*
    % numClasses:  the number of categories
    % data: Our matrix containing the training data as columns.  So, data(:,i) is the i-th training example. 
    
    % Your code should produce the prediction matrix 
    % pred, where pred(i) is argmax_c P(y(c) | x(i)).
     
    %% Unroll theta parameter
    
    % We first extract the part which compute the softmax gradient
    softmaxTheta = reshape(theta(1:hiddenSize*numClasses), numClasses, hiddenSize);
    
    % Extract out the "stack"
    stack = params2stack(theta(hiddenSize*numClasses+1:end), netconfig);
    
    %% ---------- YOUR CODE HERE --------------------------------------
    %  Instructions: Compute pred using theta assuming that the labels start 
    %                from 1.
    
    numCases = size(data, 2);
    
    % forward
    z2 = stack{1}.w * data + repmat(stack{1}.b, 1, numCases);
    a2 = sigmoid(z2);
    z3 = stack{2}.w * a2 + repmat(stack{2}.b, 1, numCases);
    a3 = sigmoid(z3);
    [~, pred] = max(softmaxTheta * a3);
    
    % -----------------------------------------------------------
    
    end
    
    
    % You might find this useful
    function sigm = sigmoid(x)
        sigm = 1 ./ (1 + exp(-x));
    end

    stackedAECost.m

    function [ cost, grad ] = stackedAECost(theta, inputSize, hiddenSize, ...
                                                  numClasses, netconfig, ...
                                                  lambda, data, labels)
                                             
    % stackedAECost: Takes a trained softmaxTheta and a training data set with labels,
    % and returns cost and gradient using a stacked autoencoder model. Used for
    % finetuning.
                                             
    % theta: trained weights from the autoencoder
    % visibleSize: the number of input units
    % hiddenSize:  the number of hidden units *at the 2nd layer*
    % numClasses:  the number of categories
    % netconfig:   the network configuration of the stack
    % lambda:      the weight regularization penalty
    % data: Our matrix containing the training data as columns.  So, data(:,i) is the i-th training example. 
    % labels: A vector containing labels, where labels(i) is the label for the
    % i-th training example
    
    
    %% Unroll softmaxTheta parameter
    
    % We first extract the part which compute the softmax gradient
    softmaxTheta = reshape(theta(1:hiddenSize*numClasses), numClasses, hiddenSize);
    
    % Extract out the "stack"
    stack = params2stack(theta(hiddenSize*numClasses+1:end), netconfig);
    
    % You will need to compute the following gradients
    softmaxThetaGrad = zeros(size(softmaxTheta));
    stackgrad = cell(size(stack));
    for d = 1:numel(stack)
        stackgrad{d}.w = zeros(size(stack{d}.w));
        stackgrad{d}.b = zeros(size(stack{d}.b));
    end
    
    cost = 0; % You need to compute this
    
    % You might find these variables useful
    numCases = size(data, 2);
    groundTruth = full(sparse(labels, 1:numCases, 1));
    
    
    %% --------------------------- YOUR CODE HERE -----------------------------
    %  Instructions: Compute the cost function and gradient vector for 
    %                the stacked autoencoder.
    %
    %                You are given a stack variable which is a cell-array of
    %                the weights and biases for every layer. In particular, you
    %                can refer to the weights of Layer d, using stack{d}.w and
    %                the biases using stack{d}.b . To get the total number of
    %                layers, you can use numel(stack).
    %
    %                The last layer of the network is connected to the softmax
    %                classification layer, softmaxTheta.
    %
    %                You should compute the gradients for the softmaxTheta,
    %                storing that in softmaxThetaGrad. Similarly, you should
    %                compute the gradients for each layer in the stack, storing
    %                the gradients in stackgrad{d}.w and stackgrad{d}.b
    %                Note that the size of the matrices in stackgrad should
    %                match exactly that of the size of the matrices in stack.
    %
    
    z2 = stack{1}.w * data + repmat(stack{1}.b, 1, numCases);
    a2 = sigmoid(z2);
    z3 = stack{2}.w * a2 + repmat(stack{2}.b, 1, numCases);
    a3 = sigmoid(z3);
    M = softmaxTheta * a3;
    M = bsxfun(@minus, M, max(M, [], 1));
    M = exp(M);
    M = bsxfun(@rdivide, M, sum(M));
    diff = groundTruth - M;
    
    cost = -(1/numCases) * sum(sum(groundTruth .* log(M))) + (lambda/2) * sum(sum(softmaxTheta .* softmaxTheta));
    
    for i=1:numClasses
        softmaxThetaGrad(i, :) = -(1/numCases) * (sum(a3 .* repmat(diff(i, :), hiddenSize, 1), 2))' + lambda * softmaxTheta(i, :);
    end
    
    delta3 = - (softmaxTheta' * diff) .* sigmoiddiff(z3);
    stackgrad{2}.w = delta3 * (a2)' ./ numCases;
    stackgrad{2}.b = sum(delta3, 2)./ numCases;
    delta2 = (stack{2}.w' * delta3) .* sigmoiddiff(z2);
    stackgrad{1}.w = delta2 * data'./ numCases;
    stackgrad{1}.b = sum(delta2, 2)./ numCases;
    
    % -------------------------------------------------------------------------
    
    %% Roll gradient vector
    grad = [softmaxThetaGrad(:) ; stack2params(stackgrad)];
    
    end
    
    
    % You might find this useful
    function sigm = sigmoid(x)
        sigm = 1 ./ (1 + exp(-x));
    end
    
    function sigmdiff = sigmoiddiff(x)
        sigmdiff = sigmoid(x) .* (1 - sigmoid(x));
    end

    stackedAEExercise.m

    %% CS294A/CS294W Stacked Autoencoder Exercise
    
    %  Instructions
    %  ------------
    % 
    %  This file contains code that helps you get started on the
    %  sstacked autoencoder exercise. You will need to complete code in
    %  stackedAECost.m
    %  You will also need to have implemented sparseAutoencoderCost.m and 
    %  softmaxCost.m from previous exercises. You will need the initializeParameters.m
    %  loadMNISTImages.m, and loadMNISTLabels.m files from previous exercises.
    %  
    %  For the purpose of completing the assignment, you do not need to
    %  change the code in this file. 
    %
    %%======================================================================
    %% STEP 0: Here we provide the relevant parameters values that will
    %  allow your sparse autoencoder to get good filters; you do not need to 
    %  change the parameters below.
    
    inputSize = 28 * 28;
    numClasses = 10;
    hiddenSizeL1 = 200;    % Layer 1 Hidden Size
    hiddenSizeL2 = 200;    % Layer 2 Hidden Size
    sparsityParam = 0.1;   % desired average activation of the hidden units.
                           % (This was denoted by the Greek alphabet rho, which looks like a lower-case "p",
                           %  in the lecture notes). 
    lambda = 3e-3;         % weight decay parameter       
    beta = 3;              % weight of sparsity penalty term       
    
    %%======================================================================
    %% STEP 1: Load data from the MNIST database
    %
    %  This loads our training data from the MNIST database files.
    
    % Load MNIST database files
    trainData = loadMNISTImages('mnist/train-images-idx3-ubyte');
    trainLabels = loadMNISTLabels('mnist/train-labels-idx1-ubyte');
    
    trainLabels(trainLabels == 0) = 10; % Remap 0 to 10 since our labels need to start from 1
    
    %%======================================================================
    %% STEP 2: Train the first sparse autoencoder
    %  This trains the first sparse autoencoder on the unlabelled STL training
    %  images.
    %  If you've correctly implemented sparseAutoencoderCost.m, you don't need
    %  to change anything here.
    
    
    %  Randomly initialize the parameters
    sae1Theta = initializeParameters(hiddenSizeL1, inputSize);
    
    %% ---------------------- YOUR CODE HERE  ---------------------------------
    %  Instructions: Train the first layer sparse autoencoder, this layer has
    %                an hidden size of "hiddenSizeL1"
    %                You should store the optimal parameters in sae1OptTheta
    
    addpath minFunc/
    options.Method = 'lbfgs'; % Here, we use L-BFGS to optimize our cost
                              % function. Generally, for minFunc to work, you
                              % need a function pointer with two outputs: the
                              % function value and the gradient. In our problem,
                              % sparseAutoencoderCost.m satisfies this.
    options.maxIter = 400;    % Maximum number of iterations of L-BFGS to run 
    options.display = 'on';
    
    
    [sae1OptTheta, cost] = minFunc( @(p) sparseAutoencoderCost(p, ...
                                       inputSize, hiddenSizeL1, ...
                                       lambda, sparsityParam, ...
                                       beta, trainData), ...
                                  sae1Theta, options);
                              
    % -------------------------------------------------------------------------
    
    
    
    %%======================================================================
    %% STEP 2: Train the second sparse autoencoder
    %  This trains the second sparse autoencoder on the first autoencoder
    %  featurse.
    %  If you've correctly implemented sparseAutoencoderCost.m, you don't need
    %  to change anything here.
    
    [sae1Features] = feedForwardAutoencoder(sae1OptTheta, hiddenSizeL1, ...
                                            inputSize, trainData);
    
    %  Randomly initialize the parameters
    sae2Theta = initializeParameters(hiddenSizeL2, hiddenSizeL1);
    
    %% ---------------------- YOUR CODE HERE  ---------------------------------
    %  Instructions: Train the second layer sparse autoencoder, this layer has
    %                an hidden size of "hiddenSizeL2" and an inputsize of
    %                "hiddenSizeL1"
    %
    %                You should store the optimal parameters in sae2OptTheta
    
    options.Method = 'lbfgs'; % Here, we use L-BFGS to optimize our cost
                              % function. Generally, for minFunc to work, you
                              % need a function pointer with two outputs: the
                              % function value and the gradient. In our problem,
                              % sparseAutoencoderCost.m satisfies this.
    options.maxIter = 400;    % Maximum number of iterations of L-BFGS to run 
    options.display = 'on';
    
    
    [sae2OptTheta, cost] = minFunc( @(p) sparseAutoencoderCost(p, ...
                                       hiddenSizeL1, hiddenSizeL2, ...
                                       lambda, sparsityParam, ...
                                       beta, sae1Features), ...
                                  sae2Theta, options);
    
    % -------------------------------------------------------------------------
    
    
    %%======================================================================
    %% STEP 3: Train the softmax classifier
    %  This trains the sparse autoencoder on the second autoencoder features.
    %  If you've correctly implemented softmaxCost.m, you don't need
    %  to change anything here.
    
    [sae2Features] = feedForwardAutoencoder(sae2OptTheta, hiddenSizeL2, ...
                                            hiddenSizeL1, sae1Features);
    
    %  Randomly initialize the parameters
    saeSoftmaxTheta = 0.005 * randn(hiddenSizeL2 * numClasses, 1);
    
    
    %% ---------------------- YOUR CODE HERE  ---------------------------------
    %  Instructions: Train the softmax classifier, the classifier takes in
    %                input of dimension "hiddenSizeL2" corresponding to the
    %                hidden layer size of the 2nd layer.
    %
    %                You should store the optimal parameters in saeSoftmaxOptTheta 
    %
    %  NOTE: If you used softmaxTrain to complete this part of the exercise,
    %        set saeSoftmaxOptTheta = softmaxModel.optTheta(:);
    
    options.Method = 'lbfgs'; % Here, we use L-BFGS to optimize our cost
                              % function. Generally, for minFunc to work, you
                              % need a function pointer with two outputs: the
                              % function value and the gradient. In our problem,
                              % softmaxCost.m satisfies this.
    minFuncOptions.display = 'on';
    
    [saeSoftmaxOptTheta, cost] = minFunc( @(p) softmaxCost(p, ...
                                       numClasses, hiddenSizeL2, lambda, ...
                                       sae2Features, trainLabels), ...                                   
                                  saeSoftmaxTheta, options);
                              
    % -------------------------------------------------------------------------
    
    
    
    %%======================================================================
    %% STEP 5: Finetune softmax model
    
    % Implement the stackedAECost to give the combined cost of the whole model
    % then run this cell.
    
    % Initialize the stack using the parameters learned
    stack = cell(2,1);
    stack{1}.w = reshape(sae1OptTheta(1:hiddenSizeL1*inputSize), ...
                         hiddenSizeL1, inputSize);
    stack{1}.b = sae1OptTheta(2*hiddenSizeL1*inputSize+1:2*hiddenSizeL1*inputSize+hiddenSizeL1);
    stack{2}.w = reshape(sae2OptTheta(1:hiddenSizeL2*hiddenSizeL1), ...
                         hiddenSizeL2, hiddenSizeL1);
    stack{2}.b = sae2OptTheta(2*hiddenSizeL2*hiddenSizeL1+1:2*hiddenSizeL2*hiddenSizeL1+hiddenSizeL2);
    
    % Initialize the parameters for the deep model
    [stackparams, netconfig] = stack2params(stack);
    stackedAETheta = [ saeSoftmaxOptTheta ; stackparams ];
    
    %% ---------------------- YOUR CODE HERE  ---------------------------------
    %  Instructions: Train the deep network, hidden size here refers to the '
    %                dimension of the input to the classifier, which corresponds 
    %                to "hiddenSizeL2".
    %
    %
    
    options.Method = 'lbfgs'; % Here, we use L-BFGS to optimize our cost
                              % function. Generally, for minFunc to work, you
                              % need a function pointer with two outputs: the
                              % function value and the gradient. In our problem,
                              % softmaxCost.m satisfies this.
    minFuncOptions.display = 'on';
    
    [stackedAEOptTheta, cost] = minFunc( @(p) stackedAECost(p, ...
                                       inputSize, hiddenSizeL2, numClasses, ...
                                       netconfig, lambda, trainData, trainLabels), ...                                   
                                  stackedAETheta, options);
            
    % -------------------------------------------------------------------------
    
    
    
    %%======================================================================
    %% STEP 6: Test 
    %  Instructions: You will need to complete the code in stackedAEPredict.m
    %                before running this part of the code
    %
    
    % Get labelled test images
    % Note that we apply the same kind of preprocessing as the training set
    testData = loadMNISTImages('mnist/t10k-images-idx3-ubyte');
    testLabels = loadMNISTLabels('mnist/t10k-labels-idx1-ubyte');
    
    testLabels(testLabels == 0) = 10; % Remap 0 to 10
    
    [pred] = stackedAEPredict(stackedAETheta, inputSize, hiddenSizeL2, ...
                              numClasses, netconfig, testData);
    
    acc = mean(testLabels(:) == pred(:));
    fprintf('Before Finetuning Test Accuracy: %0.3f%%
    ', acc * 100);
    
    [pred] = stackedAEPredict(stackedAEOptTheta, inputSize, hiddenSizeL2, ...
                              numClasses, netconfig, testData);
    
    acc = mean(testLabels(:) == pred(:));
    fprintf('After Finetuning Test Accuracy: %0.3f%%
    ', acc * 100);
    
    % Accuracy is the proportion of correctly classified images
    % The results for our implementation were:
    %
    % Before Finetuning Test Accuracy: 87.7%
    % After Finetuning Test Accuracy:  97.6%
    %
    % If your values are too low (accuracy less than 95%), you should check 
    % your code for errors, and make sure you are training on the 
    % entire data set of 60000 28x28 training images 
    % (unless you modified the loading code, this should be the case)

    Before Finetuning Test Accuracy: 87.740%
    After Finetuning Test Accuracy: 97.610%

  • 相关阅读:
    ROUTEROS常用命令
    失败团队领导者的10个特征
    一关于C#程序反编译讨论的帖子
    给窗体的任务栏右键菜单增加项目
    C#实现自动填表
    JavaScript实现拷贝图像
    跟踪路由Tracert
    更改软件默认安装目录
    清除右键菜单右打开方式中的项
    程序员的十层楼(1~8层)
  • 原文地址:https://www.cnblogs.com/ganganloveu/p/4216981.html
Copyright © 2011-2022 走看看