zoukankan      html  css  js  c++  java
  • R语言实战

    > vars <- c("mpg", "hp", "wt")
    > head(mtcars[vars])
                       mpg  hp    wt
    Mazda RX4         21.0 110 2.620
    Mazda RX4 Wag     21.0 110 2.875
    Datsun 710        22.8  93 2.320
    Hornet 4 Drive    21.4 110 3.215
    Hornet Sportabout 18.7 175 3.440
    Valiant           18.1 105 3.460
    > 
    

    1. 方法云集

    > summary(mtcars[vars])
          mpg              hp              wt       
     Min.   :10.40   Min.   : 52.0   Min.   :1.513  
     1st Qu.:15.43   1st Qu.: 96.5   1st Qu.:2.581  
     Median :19.20   Median :123.0   Median :3.325  
     Mean   :20.09   Mean   :146.7   Mean   :3.217  
     3rd Qu.:22.80   3rd Qu.:180.0   3rd Qu.:3.610  
     Max.   :33.90   Max.   :335.0   Max.   :5.424 
    
    > mystats <- function(x, na.omit=FALSE){
    +     if (na.omit)
    +         x <- x[!is.na(x)]
    +     m <- mean(x)
    +     n <- length(x)
    +     s <- sd(x)
    +     skew <- sum((x-m)^3/s^3)/n
    +     kurt <- sum((x-m)^4/s^4)/n-3
    +     return(c(n=n, mean=m, stdev=s, skew=skew, kurtosis=kurt))
    + }
    > sapply(mtcars[vars], mystats)
                   mpg          hp          wt
    n        32.000000  32.0000000 32.00000000
    mean     20.090625 146.6875000  3.21725000
    stdev     6.026948  68.5628685  0.97845744
    skew      0.610655   0.7260237  0.42314646
    kurtosis -0.372766  -0.1355511 -0.02271075
    > 
    

    mpg平均值20.1,标准偏差6.0. 分布呈现右偏(偏度0.6),较正态分布稍平(峰度-0.37)

    Hmisc 包安装失败

    1)通过Hmisc包中的describe()函数计算描述性统计量

    2)通过pastecs包中的stat.desc()函数计算描述性统计量

    > vars <- c("mpg", "hp", "wt")
    > library(pastecs)
    > stat.desc(mtcars[vars])
                         mpg           hp          wt
    nbr.val       32.0000000   32.0000000  32.0000000
    nbr.null       0.0000000    0.0000000   0.0000000
    nbr.na         0.0000000    0.0000000   0.0000000
    min           10.4000000   52.0000000   1.5130000
    max           33.9000000  335.0000000   5.4240000
    range         23.5000000  283.0000000   3.9110000
    sum          642.9000000 4694.0000000 102.9520000
    median        19.2000000  123.0000000   3.3250000
    mean          20.0906250  146.6875000   3.2172500
    SE.mean        1.0654240   12.1203173   0.1729685
    CI.mean.0.95   2.1729465   24.7195501   0.3527715
    var           36.3241028 4700.8669355   0.9573790
    std.dev        6.0269481   68.5628685   0.9784574
    coef.var       0.2999881    0.4674077   0.3041285
    

    psych包中describe()函数计算 非缺失值的数量、平均数、标准差、中位数、截尾均值、绝对中位差、最小值、最大值、值域、偏度、峰度和平均值的标准误。

    3)通过psych包中的describe()函数计算描述性统计量

    > library(psych)
    > describe(mtcars[vars])
        vars  n   mean    sd median trimmed   mad   min    max  range skew kurtosis    se
    mpg    1 32  20.09  6.03  19.20   19.70  5.41 10.40  33.90  23.50 0.61    -0.37  1.07
    hp     2 32 146.69 68.56 123.00  141.19 77.10 52.00 335.00 283.00 0.73    -0.14 12.12
    wt     3 32   3.22  0.98   3.33    3.15  0.77  1.51   5.42   3.91 0.42    -0.02  0.17
    

    2. 分组计算描述性统计量

    > aggregate(mtcars[vars], by=list(am=mtcars$am), mean)
      am      mpg       hp       wt
    1  0 17.14737 160.2632 3.768895
    2  1 24.39231 126.8462 2.411000
    > aggregate(mtcars[vars], by=list(am=mtcars$am), sd)
      am      mpg       hp        wt
    1  0 3.833966 53.90820 0.7774001
    2  1 6.166504 84.06232 0.6169816
    > 
    

      

    使用by()分组计算描述性统计量(失败)

    doBy包安装失败

    使用psych包中的describe.by()分组计算概述统计量

    > library(psych)
    > describe.by(mtcars[vars], mtcars$am)
    
     Descriptive statistics by group 
    group: 0
        vars  n   mean    sd median trimmed   mad   min    max  range  skew kurtosis    se
    mpg    1 19  17.15  3.83  17.30   17.12  3.11 10.40  24.40  14.00  0.01    -0.80  0.88
    hp     2 19 160.26 53.91 175.00  161.06 77.10 62.00 245.00 183.00 -0.01    -1.21 12.37
    wt     3 19   3.77  0.78   3.52    3.75  0.45  2.46   5.42   2.96  0.98     0.14  0.18
    -------------------------------------------------------------------- 
    group: 1
        vars  n   mean    sd median trimmed   mad   min    max  range skew kurtosis    se
    mpg    1 13  24.39  6.17  22.80   24.38  6.67 15.00  33.90  18.90 0.05    -1.46  1.71
    hp     2 13 126.85 84.06 109.00  114.73 63.75 52.00 335.00 283.00 1.36     0.56 23.31
    wt     3 13   2.41  0.62   2.32    2.39  0.68  1.51   3.57   2.06 0.21    -1.17  0.17
    Warning message:
    describe.by is deprecated.  Please use the describeBy function 
    > 
    

     

    通过reshape包分组计算概述统计量

    > library(reshape)
    > dstats <- function(x)(c(n=length(x), mean=mean(x), sd=sd(x)))
    > dfm <- melt(mtcars, measure.vars=("mpg","hp","wt"), id.vars=c("am","cyl"))
    Error: unexpected ',' in "dfm <- melt(mtcars, measure.vars=("mpg","
    > dfm <- melt(mtcars, measure.vars=c("mpg","hp","wt"), id.vars=c("am","cyl"))
    > cast(dfm, am+cyl+variable~.,dstats)
       am cyl variable  n       mean         sd
    1   0   4      mpg  3  22.900000  1.4525839
    2   0   4       hp  3  84.666667 19.6553640
    3   0   4       wt  3   2.935000  0.4075230
    4   0   6      mpg  4  19.125000  1.6317169
    5   0   6       hp  4 115.250000  9.1787799
    6   0   6       wt  4   3.388750  0.1162164
    7   0   8      mpg 12  15.050000  2.7743959
    8   0   8       hp 12 194.166667 33.3598379
    9   0   8       wt 12   4.104083  0.7683069
    10  1   4      mpg  8  28.075000  4.4838599
    11  1   4       hp  8  81.875000 22.6554156
    12  1   4       wt  8   2.042250  0.4093485
    13  1   6      mpg  3  20.566667  0.7505553
    14  1   6       hp  3 131.666667 37.5277675
    15  1   6       wt  3   2.755000  0.1281601
    16  1   8      mpg  2  15.400000  0.5656854
    17  1   8       hp  2 299.500000 50.2045815
    18  1   8       wt  2   3.370000  0.2828427
    

      

    3. 结果的可视化

  • 相关阅读:
    基于PHP的正则表达式
    学习笔记---C/C++语法
    Stack的c实现
    回忆过去的两年
    学习笔记---计算机组成
    The shortest path---hdu2224 && Tour---poj2677(旅行商问题)
    Ubantu Linux 环境下编译c++程序
    Quoit Design---hdu1007(最近点对问题 分治法)
    Wrestling Match---hdu5971(2016CCPC大连 染色法判断是否是二分图)
    异或密码---hdu5968(CCPC合肥,二分)
  • 原文地址:https://www.cnblogs.com/wnzhong/p/7600691.html
Copyright © 2011-2022 走看看