zoukankan      html  css  js  c++  java
  • R语言与概率统计(三) 多元统计分析(上)

    > #############6.2一元线性回归分析
    > x<-c(0.10,0.11,0.12,0.13,0.14,0.15,0.16,0.17,0.18,0.20,0.21,0.23)
    > y<-c(42.0,43.5,45.0,45.5,45.0,47.5,49.0,53.0,50.0,55.0,55.0,60.0)
    > plot(x~y)
    > lm.sol<-lm(y ~ x)
    > summary(lm.sol)
    
    Call:
    lm(formula = y ~ x)
    
    Residuals:
        Min      1Q  Median      3Q     Max 
    -2.0431 -0.7056  0.1694  0.6633  2.2653 
    
    Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
    (Intercept)   28.493      1.580   18.04 5.88e-09 ***
    x            130.835      9.683   13.51 9.50e-08 ***    #所以y=130.835x+28.493,***表示显著性水平,*越多越好
    ---
    Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1    #显著性水平
    
    Residual standard error: 1.319 on 10 degrees of freedom
    Multiple R-squared:  0.9481,	Adjusted R-squared:  0.9429   
    F-statistic: 182.6 on 1 and 10 DF,  p-value: 9.505e-08    ¥F检验,检验所有系数全是0的假设
    
    > new=data.frame(x=0.16)#怎么预测多个数值的结果?
    > lm.pred=predict(lm.sol,new,interval='prediction',level=0.95)
    > lm.pred
           fit      lwr      upr
    1 49.42639 46.36621 52.48657
    

    先求对数,再*100

    > X<-matrix(c(
    +   194.5, 20.79, 1.3179, 131.79,
    +   194.3, 20.79, 1.3179, 131.79,
    +   197.9, 22.40, 1.3502, 135.02,
    +   198.4, 22.67, 1.3555, 135.55,
    +   199.4, 23.15, 1.3646, 136.46,
    +   199.9, 23.35, 1.3683, 136.83,
    +   200.9, 23.89, 1.3782, 137.82,
    +   201.1, 23.99, 1.3800, 138.00,
    +   201.4, 24.02, 1.3806, 138.06,
    +   201.3, 24.01, 1.3805, 138.05,
    +   203.6, 25.14, 1.4004, 140.04,
    +   204.6, 26.57, 1.4244, 142.44,
    +   209.5, 28.49, 1.4547, 145.47,
    +   208.6, 27.76, 1.4434, 144.34,
    +   210.7, 29.04, 1.4630, 146.30,
    +   211.9, 29.88, 1.4754, 147.54,
    +   212.2, 30.06, 1.4780, 147.80),
    +   ncol=4, byrow=T,
    +   dimnames = list(1:17, c("F", "h", "log", "log100")))#如何改变行和列的名称,如何按列排列数据?
    > 
    > forbes<-data.frame(X)#把矩阵X转化为数据框
    > plot(forbes$F, forbes$log100)#画出两个变量之间的散点图,观察是否存在线性趋势;学习
    > #如何从数据框里面调取向量。怎么写坐标轴的名字和标题?
    > #如何从数据框里面调取向量。怎么写坐标轴的名字和标题?
    > lm.sol<-lm(log100~F, data=forbes)
    > summary(lm.sol)
    
    Call:
    lm(formula = log100 ~ F, data = forbes)
    
    Residuals:
         Min       1Q   Median       3Q      Max 
    -0.32261 -0.14530 -0.06750  0.02111  1.35924 
    
    Coefficients:
                 Estimate Std. Error t value Pr(>|t|)    
    (Intercept) -42.13087    3.33895  -12.62 2.17e-09 ***
    F             0.89546    0.01645   54.45  < 2e-16 ***
    ---
    Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
    
    Residual standard error: 0.3789 on 15 degrees of freedom
    Multiple R-squared:  0.995,	Adjusted R-squared:  0.9946 
    F-statistic:  2965 on 1 and 15 DF,  p-value: < 2.2e-16
    
    > abline(lm.sol)#在散点图上添加直线

    #残差检验
    y.res<-residuals(lm.sol);plot(y.res)#画出残差图
    text(12,y.res[12], labels=12,adj=1.2)
    

    #异常值的判断
    library(car)
    outlierTest(lm.sol)
    
    > outlierTest(lm.sol)
       rstudent unadjusted p-value Bonferroni p
    12 12.40369         6.1097e-09   1.0386e-07
    
    > plot(lm.sol)
    Hit <Return> to see next plot: return
    Hit <Return> to see next plot: return
    Hit <Return> to see next plot: return
    Hit <Return> to see next plot: return
    

     

    ##################################6.6多元回归分析
    blood<-data.frame(
      X1=c(76.0, 91.5, 85.5, 82.5, 79.0, 80.5, 74.5, 
           79.0, 85.0, 76.5, 82.0, 95.0, 92.5),
      X2=c(50, 20, 20, 30, 30, 50, 60, 50, 40, 55, 
           40, 40, 20),
      Y= c(120, 141, 124, 126, 117, 125, 123, 125,
           132, 123, 132, 155, 147)
    )
    
    #多元回归分析时,最好先检查变量之间的相关性
    cor(blood)
    library(car)
    scatterplotMatrix(blood,spread=F,lty.smooth=2,main='blood plot matrix')
    

      

    > lm.sol<-lm(Y ~ X1+X2, data=blood)
    > summary(lm.sol)
    
    Call:
    lm(formula = Y ~ X1 + X2, data = blood)
    
    Residuals:
        Min      1Q  Median      3Q     Max 
    -4.0404 -1.0183  0.4640  0.6908  4.3274 
    
    Coefficients:
                 Estimate Std. Error t value Pr(>|t|)    
    (Intercept) -62.96336   16.99976  -3.704 0.004083 ** 
    X1            2.13656    0.17534  12.185 2.53e-07 ***
    X2            0.40022    0.08321   4.810 0.000713 ***
    ---
    Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
    
    Residual standard error: 2.854 on 10 degrees of freedom
    Multiple R-squared:  0.9461,	Adjusted R-squared:  0.9354 
    F-statistic: 87.84 on 2 and 10 DF,  p-value: 4.531e-07
    
    > #回归系数的区间估计
    > confint(lm.sol)
                       2.5 %      97.5 %
    (Intercept) -100.8411862 -25.0855320
    X1             1.7458709   2.5272454
    X2             0.2148077   0.5856246
    > #6.8预测
    > new=data.frame(X1=80,X2=40)#怎么做多组预测?
    > lm.pred=predict(lm.sol,new,interval='prediction',level=0.95)
    > lm.pred
           fit      lwr      upr
    1 123.9699 117.2889 130.6509
    

     

    所有代码:

    #############6.2一元线性回归分析
    x<-c(0.10,0.11,0.12,0.13,0.14,0.15,0.16,0.17,0.18,0.20,0.21,0.23)
    y<-c(42.0,43.5,45.0,45.5,45.0,47.5,49.0,53.0,50.0,55.0,55.0,60.0)
    plot(x~y)
    lm.sol<-lm(y ~ x)
    summary(lm.sol)
    #6.4做预测 
    new=data.frame(x=0.16)#怎么预测多个数值的结果?
    lm.pred=predict(lm.sol,new,interval='prediction',level=0.95)
    lm.pred
    ######
    X<-matrix(c(
      194.5, 20.79, 1.3179, 131.79,
      194.3, 20.79, 1.3179, 131.79,
      197.9, 22.40, 1.3502, 135.02,
      198.4, 22.67, 1.3555, 135.55,
      199.4, 23.15, 1.3646, 136.46,
      199.9, 23.35, 1.3683, 136.83,
      200.9, 23.89, 1.3782, 137.82,
      201.1, 23.99, 1.3800, 138.00,
      201.4, 24.02, 1.3806, 138.06,
      201.3, 24.01, 1.3805, 138.05,
      203.6, 25.14, 1.4004, 140.04,
      204.6, 26.57, 1.4244, 142.44,
      209.5, 28.49, 1.4547, 145.47,
      208.6, 27.76, 1.4434, 144.34,
      210.7, 29.04, 1.4630, 146.30,
      211.9, 29.88, 1.4754, 147.54,
      212.2, 30.06, 1.4780, 147.80),
      ncol=4, byrow=T,
      dimnames = list(1:17, c("F", "h", "log", "log100")))#如何改变行和列的名称,如何按列排列数据?
    
    forbes<-data.frame(X)#把矩阵X转化为数据框
    plot(forbes$F, forbes$log100)#画出两个变量之间的散点图,观察是否存在线性趋势;学习
    #如何从数据框里面调取向量。怎么写坐标轴的名字和标题?
    lm.sol<-lm(log100~F, data=forbes)
    summary(lm.sol)
    abline(lm.sol)#在散点图上添加直线
    
    #残差检验
    y.res<-residuals(lm.sol);plot(y.res)#画出残差图
    text(12,y.res[12], labels=12,adj=1.2)
    
    #异常值的判断
    library(car)
    outlierTest(lm.sol)
    
    #去除异常值
    i<-1:17; forbes12<-data.frame(X[i!=12, ])
    lm12<-lm(log100~F, data=forbes12)
    summary(lm12)
    
    ##################################6.6多元回归分析
    blood<-data.frame(
      X1=c(76.0, 91.5, 85.5, 82.5, 79.0, 80.5, 74.5, 
           79.0, 85.0, 76.5, 82.0, 95.0, 92.5),
      X2=c(50, 20, 20, 30, 30, 50, 60, 50, 40, 55, 
           40, 40, 20),
      Y= c(120, 141, 124, 126, 117, 125, 123, 125,
           132, 123, 132, 155, 147)
    )
    
    #多元回归分析时,最好先检查变量之间的相关性
    cor(blood)
    library(car)
    scatterplotMatrix(blood,spread=F,lty.smooth=2,main='blood plot matrix')
    
    
    lm.sol<-lm(Y ~ X1+X2, data=blood)
    summary(lm.sol)
    
    
    #回归系数的区间估计
    confint(lm.sol)
    
    #6.8预测
    new=data.frame(X1=80,X2=40)#怎么做多组预测?
    lm.pred=predict(lm.sol,new,interval='prediction',level=0.95)
    lm.pred
    View Code
  • 相关阅读:
    情报收集:Metasploit命令、查询网站和测试网站
    Hbase 学习笔记5----hbase region, store, storefile和列簇的关系
    Hbase 学习笔记4----原理
    Hbase 学习笔记3----操作以及维护
    Hbase 学习笔记2----概念
    Hbase 学习笔记1----shell
    Flume日志收集 总结
    Hadoop应用开发实战案例 第2周 Web日志分析项目 张丹
    2016.4.9-关于“放生”反而促进“捕猎”的思考
    Hadoop应用开发实战案例 第1周 基本介绍
  • 原文地址:https://www.cnblogs.com/caiyishuai/p/13270726.html
Copyright © 2011-2022 走看看