zoukankan      html  css  js  c++  java
  • CheeseZH: Stanford University: Machine Learning Ex1:Linear Regression

    (1) How to comput the Cost function in Univirate/Multivariate Linear Regression;

    (2) How to comput the Batch Gradient Descent function in Univirate/Multivariate Linear Regression;

    (3) How to scale features by mean value and standard deviation;

    (4) How to calculate Theta by normal equaltion;

    Data1

    6.1101,17.592
    5.5277,9.1302
    8.5186,13.662
    7.0032,11.854
    5.8598,6.8233
    8.3829,11.886
    7.4764,4.3483
    8.5781,12
    6.4862,6.5987
    5.0546,3.8166
    5.7107,3.2522
    14.164,15.505
    5.734,3.1551
    8.4084,7.2258
    5.6407,0.71618
    5.3794,3.5129
    6.3654,5.3048
    5.1301,0.56077
    6.4296,3.6518
    7.0708,5.3893
    6.1891,3.1386
    20.27,21.767
    5.4901,4.263
    6.3261,5.1875
    5.5649,3.0825
    18.945,22.638
    12.828,13.501
    10.957,7.0467
    13.176,14.692
    22.203,24.147
    5.2524,-1.22
    6.5894,5.9966
    9.2482,12.134
    5.8918,1.8495
    8.2111,6.5426
    7.9334,4.5623
    8.0959,4.1164
    5.6063,3.3928
    12.836,10.117
    6.3534,5.4974
    5.4069,0.55657
    6.8825,3.9115
    11.708,5.3854
    5.7737,2.4406
    7.8247,6.7318
    7.0931,1.0463
    5.0702,5.1337
    5.8014,1.844
    11.7,8.0043
    5.5416,1.0179
    7.5402,6.7504
    5.3077,1.8396
    7.4239,4.2885
    7.6031,4.9981
    6.3328,1.4233
    6.3589,-1.4211
    6.2742,2.4756
    5.6397,4.6042
    9.3102,3.9624
    9.4536,5.4141
    8.8254,5.1694
    5.1793,-0.74279
    21.279,17.929
    14.908,12.054
    18.959,17.054
    7.2182,4.8852
    8.2951,5.7442
    10.236,7.7754
    5.4994,1.0173
    20.341,20.992
    10.136,6.6799
    7.3345,4.0259
    6.0062,1.2784
    7.2259,3.3411
    5.0269,-2.6807
    6.5479,0.29678
    7.5386,3.8845
    5.0365,5.7014
    10.274,6.7526
    5.1077,2.0576
    5.7292,0.47953
    5.1884,0.20421
    6.3557,0.67861
    9.7687,7.5435
    6.5159,5.3436
    8.5172,4.2415
    9.1802,6.7981
    6.002,0.92695
    5.5204,0.152
    5.0594,2.8214
    5.7077,1.8451
    7.6366,4.2959
    5.8707,7.2029
    5.3054,1.9869
    8.2934,0.14454
    13.394,9.0551
    5.4369,0.61705
    View Code

    1. ex1.m

      1 %% Machine Learning Online Class - Exercise 1: Linear Regression
      2 
      3 %  Instructions
      4 %  ------------
      5 % 
      6 %  This file contains code that helps you get started on the
      7 %  linear exercise. You will need to complete the following functions 
      8 %  in this exericse:
      9 %
     10 %     warmUpExercise.m
     11 %     plotData.m
     12 %     gradientDescent.m
     13 %     computeCost.m
     14 %     gradientDescentMulti.m
     15 %     computeCostMulti.m
     16 %     featureNormalize.m
     17 %     normalEqn.m
     18 %
     19 %  For this exercise, you will not need to change any code in this file,
     20 %  or any other files other than those mentioned above.
     21 %
     22 % x refers to the population size in 10,000s
     23 % y refers to the profit in $10,000s
     24 %
     25 
     26 %% Initialization
     27 clear ; close all; clc
     28 
     29 %% ==================== Part 1: Basic Function ====================
     30 % Complete warmUpExercise.m 
     31 fprintf('Running warmUpExercise ... 
    ');
     32 fprintf('5x5 Identity Matrix: 
    ');
     33 warmUpExercise()
     34 
     35 fprintf('Program paused. Press enter to continue.
    ');
     36 pause;
     37 
     38 
     39 %% ======================= Part 2: Plotting =======================
     40 fprintf('Plotting Data ...
    ')
     41 data = load('ex1data1.txt');
     42 X = data(:, 1); y = data(:, 2);
     43 m = length(y); % number of training examples
     44 
     45 % Plot Data
     46 % Note: You have to complete the code in plotData.m
     47 plotData(X, y);
     48 
     49 fprintf('Program paused. Press enter to continue.
    ');
     50 pause;
     51 
     52 %% =================== Part 3: Gradient descent ===================
     53 fprintf('Running Gradient Descent ...
    ')
     54 
     55 X = [ones(m, 1), data(:,1)]; % Add a column of ones to x
     56 theta = zeros(2, 1); % initialize fitting parameters
     57 
     58 % Some gradient descent settings
     59 iterations = 1500;
     60 alpha = 0.01;
     61 
     62 % compute and display initial cost
     63 computeCost(X, y, theta)
     64 
     65 % run gradient descent
     66 theta = gradientDescent(X, y, theta, alpha, iterations);
     67 
     68 % print theta to screen
     69 fprintf('Theta found by gradient descent: ');
     70 fprintf('%f %f 
    ', theta(1), theta(2));
     71 
     72 % Plot the linear fit
     73 hold on; % keep previous plot visible
     74 plot(X(:,2), X*theta, '-')
     75 legend('Training data', 'Linear regression')
     76 hold off % don't overlay any more plots on this figure
     77 
     78 % Predict values for population sizes of 35,000 and 70,000
     79 predict1 = [1, 3.5] *theta;
     80 fprintf('For population = 35,000, we predict a profit of %f
    ',...
     81     predict1*10000);
     82 predict2 = [1, 7] * theta;
     83 fprintf('For population = 70,000, we predict a profit of %f
    ',...
     84     predict2*10000);
     85 
     86 fprintf('Program paused. Press enter to continue.
    ');
     87 pause;
     88 
     89 %% ============= Part 4: Visualizing J(theta_0, theta_1) =============
     90 fprintf('Visualizing J(theta_0, theta_1) ...
    ')
     91 
     92 % Grid over which we will calculate J
     93 theta0_vals = linspace(-10, 10, 100);
     94 theta1_vals = linspace(-1, 4, 100);
     95 
     96 % initialize J_vals to a matrix of 0's
     97 J_vals = zeros(length(theta0_vals), length(theta1_vals));
     98 
     99 % Fill out J_vals
    100 for i = 1:length(theta0_vals)
    101     for j = 1:length(theta1_vals)
    102       t = [theta0_vals(i); theta1_vals(j)];    
    103       J_vals(i,j) = computeCost(X, y, t);
    104     end
    105 end
    106 
    107 
    108 % Because of the way meshgrids work in the surf command, we need to 
    109 % transpose J_vals before calling surf, or else the axes will be flipped
    110 J_vals = J_vals';
    111 % Surface plot
    112 figure;
    113 surf(theta0_vals, theta1_vals, J_vals)
    114 xlabel('	heta_0'); ylabel('	heta_1');
    115 
    116 % Contour plot
    117 figure;
    118 % Plot J_vals as 15 contours spaced logarithmically between 0.01 and 100
    119 contour(theta0_vals, theta1_vals, J_vals, logspace(-2, 3, 20))
    120 xlabel('	heta_0'); ylabel('	heta_1');
    121 hold on;
    122 plot(theta(1), theta(2), 'rx', 'MarkerSize', 10, 'LineWidth', 2);
    View Code

    2.warmUpExercise.m

     1 function A = warmUpExercise()
     2 %WARMUPEXERCISE Example function in octave
     3 %   A = WARMUPEXERCISE() is an example function that returns the 5x5 identity matrix
     4 
     5 A = [];
     6 % ============= YOUR CODE HERE ==============
     7 % Instructions: Return the 5x5 identity matrix 
     8 %               In octave, we return values by defining which variables
     9 %               represent the return values (at the top of the file)
    10 %               and then set them accordingly. 
    11 A = eye(5);
    12 
    13 
    14 
    15 
    16 
    17 
    18 % ===========================================
    19 
    20 
    21 end
    View Code

    3. computCost.m

     1 function J = computeCost(X, y, theta)
     2 %COMPUTECOST Compute cost for linear regression
     3 %   J = COMPUTECOST(X, y, theta) computes the cost of using theta as the
     4 %   parameter for linear regression to fit the data points in X and y
     5 
     6 % Initialize some useful values
     7 m = length(y); % number of training examples
     8 
     9 % You need to return the following variables correctly 
    10 J = 0;
    11 
    12 % ====================== YOUR CODE HERE ======================
    13 % Instructions: Compute the cost of a particular choice of theta
    14 %               You should set J to the cost.
    15 hypothesis = X*theta;
    16 J = 1/(2*m)*(sum((hypothesis-y).^2));
    17 
    18 % =========================================================================
    19 
    20 end
    View Code

    4.gradientDescent.m

     1 function [theta, J_history] = gradientDescent(X, y, theta, alpha, num_iters)
     2 %GRADIENTDESCENT Performs gradient descent to learn theta
     3 %   theta = GRADIENTDESENT(X, y, theta, alpha, num_iters) updates theta by 
     4 %   taking num_iters gradient steps with learning rate alpha
     5 
     6 % Initialize some useful values
     7 m = length(y); % number of training examples
     8 J_history = zeros(num_iters, 1);
     9 
    10 for iter = 1:num_iters
    11 
    12     % ====================== YOUR CODE HERE ======================
    13     % Instructions: Perform a single gradient step on the parameter vector
    14     %               theta. 
    15     %
    16     % Hint: While debugging, it can be useful to print out the values
    17     %       of the cost function (computeCost) and gradient here.
    18     %
    19     hypothesis = X*theta;
    20     delta = X'*(hypothesis-y);
    21     theta  = theta - alpha/m*delta;
    22 
    23     % ============================================================
    24 
    25     % Save the cost J in every iteration    
    26     J_history(iter) = computeCost(X, y, theta);
    27 
    28 end
    29 
    30 end
    View Code

    Data2

    2104,3,399900
    1600,3,329900
    2400,3,369000
    1416,2,232000
    3000,4,539900
    1985,4,299900
    1534,3,314900
    1427,3,198999
    1380,3,212000
    1494,3,242500
    1940,4,239999
    2000,3,347000
    1890,3,329999
    4478,5,699900
    1268,3,259900
    2300,4,449900
    1320,2,299900
    1236,3,199900
    2609,4,499998
    3031,4,599000
    1767,3,252900
    1888,2,255000
    1604,3,242900
    1962,4,259900
    3890,3,573900
    1100,3,249900
    1458,3,464500
    2526,3,469000
    2200,3,475000
    2637,3,299900
    1839,2,349900
    1000,1,169900
    2040,4,314900
    3137,3,579900
    1811,4,285900
    1437,3,249900
    1239,3,229900
    2132,4,345000
    4215,4,549000
    2162,4,287000
    1664,2,368500
    2238,3,329900
    2567,4,314000
    1200,3,299000
    852,2,179900
    1852,4,299900
    1203,3,239500
    View Code

    0.ex1_multi.m

      1 %% Machine Learning Online Class
      2 %  Exercise 1: Linear regression with multiple variables
      3 %
      4 %  Instructions
      5 %  ------------
      6 % 
      7 %  This file contains code that helps you get started on the
      8 %  linear regression exercise. 
      9 %
     10 %  You will need to complete the following functions in this 
     11 %  exericse:
     12 %
     13 %     warmUpExercise.m
     14 %     plotData.m
     15 %     gradientDescent.m
     16 %     computeCost.m
     17 %     gradientDescentMulti.m
     18 %     computeCostMulti.m
     19 %     featureNormalize.m
     20 %     normalEqn.m
     21 %
     22 %  For this part of the exercise, you will need to change some
     23 %  parts of the code below for various experiments (e.g., changing
     24 %  learning rates).
     25 %
     26 
     27 %% Initialization
     28 
     29 %% ================ Part 1: Feature Normalization ================
     30 
     31 %% Clear and Close Figures
     32 clear ; close all; clc
     33 
     34 fprintf('Loading data ...
    ');
     35 
     36 %% Load Data
     37 data = load('ex1data2.txt');
     38 X = data(:, 1:2);
     39 y = data(:, 3);
     40 m = length(y);
     41 
     42 % Print out some data points
     43 fprintf('First 10 examples from the dataset: 
    ');
     44 fprintf(' x = [%.0f %.0f], y = %.0f 
    ', [X(1:10,:) y(1:10,:)]');
     45 
     46 fprintf('Program paused. Press enter to continue.
    ');
     47 pause;
     48 
     49 % Scale features and set them to zero mean
     50 fprintf('Normalizing Features ...
    ');
     51 
     52 [X mu sigma] = featureNormalize(X);
     53 
     54 % Add intercept term to X
     55 X = [ones(m, 1) X];
     56 
     57 
     58 %% ================ Part 2: Gradient Descent ================
     59 
     60 % ====================== YOUR CODE HERE ======================
     61 % Instructions: We have provided you with the following starter
     62 %               code that runs gradient descent with a particular
     63 %               learning rate (alpha). 
     64 %
     65 %               Your task is to first make sure that your functions - 
     66 %               computeCost and gradientDescent already work with 
     67 %               this starter code and support multiple variables.
     68 %
     69 %               After that, try running gradient descent with 
     70 %               different values of alpha and see which one gives
     71 %               you the best result.
     72 %
     73 %               Finally, you should complete the code at the end
     74 %               to predict the price of a 1650 sq-ft, 3 br house.
     75 %
     76 % Hint: By using the 'hold on' command, you can plot multiple
     77 %       graphs on the same figure.
     78 %
     79 % Hint: At prediction, make sure you do the same feature normalization.
     80 %
     81 
     82 fprintf('Running gradient descent ...
    ');
     83 
     84 % Choose some alpha value
     85 alpha = 0.01;
     86 num_iters = 400;
     87 
     88 % Init Theta and Run Gradient Descent 
     89 theta = zeros(3, 1);
     90 [theta, J_history] = gradientDescentMulti(X, y, theta, alpha, num_iters);
     91 
     92 % Plot the convergence graph
     93 figure;
     94 plot(1:numel(J_history), J_history, '-b', 'LineWidth', 2);
     95 xlabel('Number of iterations');
     96 ylabel('Cost J');
     97 
     98 % Display gradient descent's result
     99 fprintf('Theta computed from gradient descent: 
    ');
    100 fprintf(' %f 
    ', theta);
    101 fprintf('
    ');
    102 
    103 % Estimate the price of a 1650 sq-ft, 3 br house
    104 % ====================== YOUR CODE HERE ======================
    105 % Recall that the first column of X is all-ones. Thus, it does
    106 % not need to be normalized.
    107 price = 0; % You should change this
    108 
    109 
    110 % ============================================================
    111 
    112 fprintf(['Predicted price of a 1650 sq-ft, 3 br house ' ...
    113          '(using gradient descent):
     $%f
    '], price);
    114 
    115 fprintf('Program paused. Press enter to continue.
    ');
    116 pause;
    117 
    118 %% ================ Part 3: Normal Equations ================
    119 
    120 fprintf('Solving with normal equations...
    ');
    121 
    122 % ====================== YOUR CODE HERE ======================
    123 % Instructions: The following code computes the closed form 
    124 %               solution for linear regression using the normal
    125 %               equations. You should complete the code in 
    126 %               normalEqn.m
    127 %
    128 %               After doing so, you should complete this code 
    129 %               to predict the price of a 1650 sq-ft, 3 br house.
    130 %
    131 
    132 %% Load Data
    133 data = csvread('ex1data2.txt');
    134 X = data(:, 1:2);
    135 y = data(:, 3);
    136 m = length(y);
    137 
    138 % Add intercept term to X
    139 X = [ones(m, 1) X];
    140 
    141 % Calculate the parameters from the normal equation
    142 theta = normalEqn(X, y);
    143 
    144 % Display normal equation's result
    145 fprintf('Theta computed from the normal equations: 
    ');
    146 fprintf(' %f 
    ', theta);
    147 fprintf('
    ');
    148 
    149 
    150 % Estimate the price of a 1650 sq-ft, 3 br house
    151 % ====================== YOUR CODE HERE ======================
    152 price = 0; % You should change this
    153 
    154 
    155 % ============================================================
    156 
    157 fprintf(['Predicted price of a 1650 sq-ft, 3 br house ' ...
    158          '(using normal equations):
     $%f
    '], price);
    View Code

    1.featureNormalize.m

     1 function [X_norm, mu, sigma] = featureNormalize(X)
     2 %FEATURENORMALIZE Normalizes the features in X 
     3 %   FEATURENORMALIZE(X) returns a normalized version of X where
     4 %   the mean value of each feature is 0 and the standard deviation
     5 %   is 1. This is often a good preprocessing step to do when
     6 %   working with learning algorithms.
     7 
     8 % You need to set these values correctly
     9 X_norm = X;
    10 mu = zeros(1, size(X, 2));
    11 sigma = zeros(1, size(X, 2));
    12 
    13 % ====================== YOUR CODE HERE ======================
    14 % Instructions: First, for each feature dimension, compute the mean
    15 %               of the feature and subtract it from the dataset,
    16 %               storing the mean value in mu. Next, compute the 
    17 %               standard deviation of each feature and divide
    18 %               each feature by it's standard deviation, storing
    19 %               the standard deviation in sigma. 
    20 %
    21 %               Note that X is a matrix where each column is a 
    22 %               feature and each row is an example. You need 
    23 %               to perform the normalization separately for 
    24 %               each feature. 
    25 %
    26 % Hint: You might find the 'mean' and 'std' functions useful.
    27 %       
    28 mu = mean(X);
    29 sigma = std(X);
    30 X_norm = (X_norm.-mu)./sigma;
    31 
    32 % ============================================================
    33 
    34 end
    View Code

    2.computCostMulti.m

     1 function J = computeCostMulti(X, y, theta)
     2 %COMPUTECOSTMULTI Compute cost for linear regression with multiple variables
     3 %   J = COMPUTECOSTMULTI(X, y, theta) computes the cost of using theta as the
     4 %   parameter for linear regression to fit the data points in X and y
     5 
     6 % Initialize some useful values
     7 m = length(y); % number of training examples
     8 
     9 % You need to return the following variables correctly 
    10 J = 0;
    11 
    12 % ====================== YOUR CODE HERE ======================
    13 % Instructions: Compute the cost of a particular choice of theta
    14 %               You should set J to the cost.
    15 hypothesis = X*theta;
    16 J = 1/(2*m)*(sum((hypothesis-y).^2));
    17 
    18 
    19 
    20 
    21 % =========================================================================
    22 
    23 end
    View Code

    3.gradientDescentMulti.m

     1 function [theta, J_history] = gradientDescentMulti(X, y, theta, alpha, num_iters)
     2 %GRADIENTDESCENTMULTI Performs gradient descent to learn theta
     3 %   theta = GRADIENTDESCENTMULTI(x, y, theta, alpha, num_iters) updates theta by
     4 %   taking num_iters gradient steps with learning rate alpha
     5 
     6 % Initialize some useful values
     7 m = length(y); % number of training examples
     8 J_history = zeros(num_iters, 1);
     9 
    10 for iter = 1:num_iters
    11 
    12     % ====================== YOUR CODE HERE ======================
    13     % Instructions: Perform a single gradient step on the parameter vector
    14     %               theta. 
    15     %
    16     % Hint: While debugging, it can be useful to print out the values
    17     %       of the cost function (computeCostMulti) and gradient here.
    18     %
    19     hypothesis = X*theta;
    20     delta = X'*(hypothesis-y);
    21     theta  = theta - alpha/m*delta;
    22 
    23     % ============================================================
    24 
    25     % Save the cost J in every iteration    
    26     J_history(iter) = computeCostMulti(X, y, theta);
    27 
    28 end
    29 
    30 end
    View Code

    4.normalEqn.m

     1 function [theta] = normalEqn(X, y)
     2 %NORMALEQN Computes the closed-form solution to linear regression 
     3 %   NORMALEQN(X,y) computes the closed-form solution to linear 
     4 %   regression using the normal equations.
     5 
     6 theta = zeros(size(X, 2), 1);
     7 
     8 % ====================== YOUR CODE HERE ======================
     9 % Instructions: Complete the code to compute the closed form solution
    10 %               to linear regression and put the result in theta.
    11 %
    12 
    13 % ---------------------- Sample Solution ----------------------
    14 
    15 theta = pinv(X'*X)*X'*y;
    16 
    17 
    18 % -------------------------------------------------------------
    19 
    20 
    21 % ============================================================
    22 
    23 end
    View Code
  • 相关阅读:
    会声会影教程之图片音乐相册制作
    js校验表单后提交表单的三种方法总结(转)
    如何避免后台IO高负载造成的长时间JVM GC停顿(转)
    nginx的upstream目前支持5种方式的分配(转)
    Nginx配置文件详细说明(转)
    如何将character_set_database latin1 改为 gbk(转)
    Maven打包可执行Jar包方式
    六种微服务架构的设计模式(转)
    Linux Shell 命令
    Condition的await-signal流程详解(转)
  • 原文地址:https://www.cnblogs.com/CheeseZH/p/4597006.html
Copyright © 2011-2022 走看看