zoukankan      html  css  js  c++  java
  • 线性回归-OLS法

    本段代码可实现OLS法的线性回归分析,并可对回归系数做出分析

      1.代码

    %%OLS法下的线性回归
    function prodict = Linear_Regression(X,Y)
    x = sym('x');
    n = max(size(X));
    %%定义画图窗格属性
    h = figure;
    set(h,'color','w');
    %%回归相关值
    XX_s_m = (X-Expection(X,1))*(X-Expection(X,1))';
    XY_s_m = (X-Expection(X,1))*(Y-Expection(Y,1))';
    YY_s_m = (Y-Expection(Y,1))*(Y-Expection(Y,1))';
    %%回归系数
    beta = XY_s_m/XX_s_m;
    alpha = Expection(Y,1)-beta*Expection(X,1);
    %%画出实际值与回归值的差
    yyaxis left
    for k = 1:n
        delta_Y(k) = Y(k) - (alpha+beta*X(k));
    end
    width = (max(X)-min(X))/(2*n);
    b = bar(X,delta_Y,width,'Facecolor',[0.9 0.9 0.9]);
    set(b,'Edgecolor','none');
    %标出差值
    disp('是否标出差值?');
    str = input('yes or no?','s');
    Y_dis = (max(Y) - min(Y))/40;
    if strcmp(str,'yes') == 1
        if n<=10
            for k = 1:length(delta_Y)
                if delta_Y(k)>=0
                    text(X(k),delta_Y(k)+Y_dis,strcat(num2str(delta_Y(k),'%.3f')),...
                        'VerticalAlignment','middle','HorizontalAlignment','center');
                else
                    text(X(k),delta_Y(k)-Y_dis,strcat(num2str(delta_Y(k),'%.3f')),...
                        'VerticalAlignment','middle','HorizontalAlignment','center');
                end
            end
        end
    end
    xlabel('X');ylabel('差值');
    hold on
    %%画出数据点
    yyaxis right
    plot(X,Y,'g--*')
    if n<=20
        grid on;
    end
    hold on
    ylabel('Y');
    %%相关系数
    R2 = (XY_s_m)^2/(XX_s_m*YY_s_m);
    %%随机项方差无偏估计
    sigma_2 = (Y*Y'-n*Expection(Y,1)^2-beta*(X*Y'-n*Expection(X,1)*Expection(Y,1)))/(n-2);
    %%beta和alpha的方差
    Var_beta = sigma_2/(X*X'-n*Expection(X,1)^2);
    Var_alpha = sigma_2*(X*X')/(n*(X*X'-n*Expection(X,1)^2));
    %%beta和alpha的协方差
    cov_alpha_beta = -Expection(X,1)*sigma_2/(X*X'-n*Expection(X,1)^2);
    %%画出回归直线
    x_v = min(X)-Expection(X,1)/sqrt(n):(max(X)-min(X))/100:max(X)+Expection(X,1)/sqrt(n);
    y_v = beta*x_v+alpha;
    y_ave = ones(1,max(size(x_v)))*Expection(Y,1);
    plot(x_v,y_v,'r',x_v,y_ave,'k','LineWidth',0.2,'LineStyle','-')
    %%标出数据点
    disp('是否标出数据点?');
    str = input('yes or no?','s');
    if strcmp(str,'yes') == 1
        if n <=10
            for k = 1:max(size(X))
                text(X(k),Y(k),['(',num2str(X(k)),',',num2str(Y(k)),')'],...
                    'color','b','VerticalAlignment','middle','HorizontalAlignment','center');hold on;
            end
        end
    end
    disp('是否输出回归方程的置信区间?');
    str = input('yes or no?','s');
    if strcmp(str,'yes') == 1
        str =  input('单侧置信系数 或者 双侧置信系数?','s');
        if strcmp(str(1),'单') == 1
            ALPHA = input('输入单侧置信系数:');
            a = 1-ALPHA*2;
        elseif strcmp(str(1),'双') == 1
            ALPHA = input('输入双侧置信系数:');
            a = 1-ALPHA;
        end
        disp('t(n-2)为:');
        t_value = nctinv(a,n-2,0)
        addvalue = t_value*sqrt(sigma_2)*sqrt(1+1/n+(x-Expection(X,1))^2/(X*X'-n*Expection(X,1)^2));
        disp('置信区间上限的方程为:');
        vpa(alpha+beta*x+addvalue,4)
        disp('置信区间下限的方程为:');
        vpa(alpha+beta*x-addvalue,4)
        y_ub = subs(alpha+beta*x+addvalue,x_v);
        y_lb = subs(alpha+beta*x-addvalue,x_v);
        plot(x_v,y_ub,'--',x_v,y_lb,'-.','color',[0.2 0.5 0.8]);
        title('OLS法对数据的线性回归');
        legend('差值','Y:实际值','y:拟合直线','平均值','置信上限','置信下限');
        hold off
        yyaxis left
        text(min(X)-Expection(X,1)/sqrt(n),max(delta_Y)-Expection(delta_Y,1)/sqrt(n),['置信程度',num2str(100*a),'%'],'color',[0.3 0.3 0.3]);
        hold off
    else
        legend('差值','Y:实际值','y:拟合直线','平均值');
        title('OLS法对数据的线性回归');
        hold off
    end
    disp('是否输出回归系数的置信区间?');
    str = input('yes or no?','s');
    if strcmp(str,'yes') == 1
        str =  input('单侧置信系数 或者 双侧置信系数?','s');
        if strcmp(str(1),'单') == 1
            ALPHA = input('输入单侧置信系数:');
            a = 1-ALPHA*2;
        elseif strcmp(str(1),'双') == 1
            ALPHA = input('输入双侧置信系数:');
            a = 1-ALPHA;
        end
        disp('t(n-2)为:');
        t_value = nctinv(a,n-2,0)
        disp('回归系数beta的置信区间为:');
        [beta-t_value*sqrt(Var_beta),beta+t_value*sqrt(Var_beta)]
        disp('回归系数alpha的置信区间为:');
        [Expection(Y,1)-(beta+t_value*sqrt(Var_beta))*Expection(X,1),...
            Expection(Y,1)-(beta-t_value*sqrt(Var_beta))*Expection(X,1)]
    end
    %%预测值输出
    disp('是否需要求预测值?');
    str = input('yes or no?','s');
    if strcmp(str,'yes') == 1
        X0 = input('输入预测向量:');
        str =  input('单侧置信系数 或者 双侧置信系数?','s');
        if strcmp(str(1),'单') == 1
            ALPHA = input('输入单侧置信系数:');
            a = 1-ALPHA*2;
        elseif strcmp(str(1),'双') == 1
            ALPHA = input('输入双侧置信系数:');
            a = 1-ALPHA;
        end
        disp('t(n-2)为:');
        t_value = nctinv(a,n-2,0)
        addvalue0 = t_value*sqrt(sigma_2).*sqrt(1+1/n+(X0-Expection(X,1)).^2/(X*X'-n*Expection(X,1)^2));
        Y0 = alpha+beta*X0;
        prodict{1,1} = {'预测点x'};
        prodict{2,1} = {'预测值y'};
        prodict{3,1} = {'置信区间'};
        for k = 2:max(size(X0))+1
            interval(k-1,:) = [Y0(k-1)-addvalue0(k-1),Y0(k-1)+addvalue0(k-1)];
            prodict{1,k} = X0(k-1);
            prodict{2,k} = Y0(k-1);
            prodict{3,k} = interval(k-1,:);
            Y0_lb(k-1) = Y0(k-1)-addvalue0(k-1);
            Y0_ub(k-1) = Y0(k-1)+addvalue0(k-1);
        end
        h2 = figure(2);
        set(h2,'color','w');
        plot(X0,Y0,'r-*',X0,Y0_ub,'g-o',X0,Y0_lb,'b-o');hold on;
        errorbar(X0,Y0,addvalue0,'color',[0.8 0.5 0.2]);
        xlabel('X');ylabel('y');
        grid on;
        title('点预测');
        legend('预测值','置信上限','置信下限','置信限');
        Y0_dis = (max(Y0)-min(Y0))/40;
        Y0_lb_dis = (max(Y0_lb)-min(Y0_lb))/40;
        Y0_ub_dis = (max(Y0_ub)-min(Y0_ub))/40;
        disp('是否显示数据?');
        str = input('yes or no?','s');
        if strcmp(str ,'yes') == 1
            if max(size(X0)) <= 3
                for k = 1:max(size(X0))
                    text(X0(k),Y0(k)+Y0_dis,['(',num2str(X0(k)),',',num2str(Y0(k)),')'],...
                        'color',[0.2 0.2 0.2]);hold on;
                    text(X0(k),Y0_lb(k)+Y0_lb_dis,['(',num2str(X0(k)),',',num2str(Y0_lb(k)),')'],...
                        'color',[0.2 0.2 0.2],'VerticalAlignment','middle','HorizontalAlignment','center');hold on;
                    text(X0(k),Y0_ub(k)+Y0_ub_dis,['(',num2str(X0(k)),',',num2str(Y0_ub(k)),')'],...
                        'color',[0.2 0.2 0.2],'VerticalAlignment','middle','HorizontalAlignment','center');hold on;
                end
            end
        end
        text(Expection(X0,1),(max(Y0)+max(Y0_ub))/2,['置信程度',num2str(100*a),'%'],'color',[0.3 0.3 0.3]);
    else
        prodict = [];
    end
    disp('回归方程为:');
    vpa(alpha+beta*x,4)
    disp('相关系数平方为:');
    vpa(R2,6)
    disp('随机项方差的无偏估计为:');
    vpa(sigma_2,6)
    disp('回归系数beta和alpha的方差分别为:');
    vpa(Var_beta,6)
    vpa(Var_alpha,6)
    disp('回归系数beta和alpha的协方差为:');
    vpa(cov_alpha_beta,6)
    %%SST,SSE和SSR
    disp('SST:');
    SST = YY_s_m
    Y_OLS = reshape(alpha+beta*X',1,n);
    disp('SSR:');
    SSR = (Y_OLS-Expection(Y,1))*(Y_OLS-Expection(Y,1))'
    disp('SSE:');
    SSE = (Y-Y_OLS)*(Y-Y_OLS)'
    
        function En = Expection(M,n)                                          %%n阶原点矩%%
            En = sum(M.^n)/size(M,2);
        end
    end
    

      2.下面以一个例子来说明程序运行结果

    clear all
    clc
    X = [-2.269 -1.248 -0.269 1.248 2.006 4.598 5.264 7.002];
    Y = [4582 4027 4958 5078 6072 4156 5948 5027];
    S = Linear_Regression(X,Y)
    
    是否标出差值?
    yes or no?yes
    是否标出数据点?
    yes or no?no
    是否输出回归方程的置信区间?
    yes or no?yes
    单侧置信系数 或者 双侧置信系数?双
    输入双侧置信系数:0.05
    t(n-2)为:
    t_value =
        1.9432
    置信区间上限的方程为:
    ans =
    78.43*x + 1466.0*(0.013*(x - 2.041)^2 + 1.125)^(1/2) + 4821.0
    置信区间下限的方程为:
    ans =
    78.43*x - 1466.0*(0.013*(x - 2.041)^2 + 1.125)^(1/2) + 4821.0
    是否输出回归系数的置信区间?
    yes or no?yes
    单侧置信系数 或者 双侧置信系数?双
    输入双侧置信系数:0.05
    t(n-2)为:
    t_value =
        1.9432
    回归系数beta的置信区间为:
    ans =
      -88.7367  245.5885
    回归系数alpha的置信区间为:
    ans =
       1.0e+03 *
        4.4796    5.1622
    是否需要求预测值?
    yes or no?yes
    输入预测向量:[-2.156 -1.306 0.248 1.296]
    单侧置信系数 或者 双侧置信系数?双
    输入双侧置信系数:0.05
    t(n-2)为:
    t_value =
        1.9432
    是否显示数据?
    yes or no?no
    回归方程为:
    ans =
    78.43*x + 4821.0
    相关系数平方为:
    ans =
    0.121668
    随机项方差的无偏估计为:
    ans =
    569067.0
    回归系数beta和alpha的方差分别为:
    ans =
    7400.35
    ans =
    101976.0
    回归系数beta和alpha的协方差为:
    ans =
    -15107.8
    SST:
    SST =
         3887366
    SSR:
    SSR =
       4.7297e+05
    SSE:
    SSE =
       3.4144e+06
    OLS =
      10×5 cell 数组
      列 1 至 3
        {'拟合斜率beta'   }    {'拟合截距alpha'  }    {'相关系数R^2'       }
        {[       78.4259]}    {[    4.8209e+03]}    {[           0.1217]}
        {'EX^2'          }    {'EY^2'          }    {'EXY'              }
        {[       13.7799]}    {[    2.5296e+07]}    {[       1.0923e+04]}
        {'min(X)'        }    {'min(Y)'        }    {'cov(X,Y)'         }
        {[       -2.2690]}    {[          4027]}    {[         753.8428]}
        {'样本标准差SD(X)'}    {'样本标准差SD(Y)'}    {'总体标准差sigma(X)'}
        {[        3.3144]}    {[      745.2100]}    {[           9.6122]}
        {'变异系数CV(Y)'  }    {'X极差'         }    {'Y极差'            }
        {[        0.1496]}    {[        9.2710]}    {[             2045]}
      列 4 至 5
        {'X平均值'           }    {'Y平均值'       }
        {[           2.0415]}    {[         4981]}
        {'max(X)'           }    {'max(Y)'       }
        {[           7.0020]}    {[         6072]}
        {'X绝对值几何均值'    }    {'Y绝对值几何均值'}
        {[           2.0591]}    {[   4.9328e+03]}
        {'总体标准差sigma(Y)'}    {'变异系数CV(X)' }
        {[       4.8592e+05]}    {[       1.6235]}
        {'n*E(Y-Y_ba)'      }    {'SSR'          }
        {[      -3.1238e+06]}    {[   1.2431e+12]}
    prodict =
      3×5 cell 数组
      列 1 至 4
        {1×1 cell}    {[   -2.1560]}    {[   -1.3060]}    {[    0.2480]}
        {1×1 cell}    {[4.6518e+03]}    {[4.7185e+03]}    {[4.8403e+03]}
        {1×1 cell}    {1×2 double  }    {1×2 double  }    {1×2 double  }
      列 5
        {[    1.2960]}
        {[4.9225e+03]}
        {1×2 double  }
    >> 
    

      

      

  • 相关阅读:
    密码验证合格程序(Python)
    Python找到所有子集
    Semi-Supervised Classification with Graph Convolutional Networks 阅读笔记
    2018 ICPC南京网络赛 L Magical Girl Haze 题解
    2018 CCPC网络赛 hdu6444 Neko's loop
    2018 CCPC 网络赛 Buy and Resell
    实对称矩阵可对角化证明
    矩阵的极分解证明
    关于欧几里得空间上的仿射变换的直观几何理解
    Codeforces Hello 2018 E题Logical Expression dp+最短路 好题
  • 原文地址:https://www.cnblogs.com/guliangt/p/12202621.html
Copyright © 2011-2022 走看看