zoukankan      html  css  js  c++  java
  • R语言实战

    8. 数据排序

    > leadership$age
    [1] 32 45 25 39 NA
    > newdata <- leadership[order(leadership$age),]
    > newdata
      manager   testDate country gender age item1 item2 item3 item4 item5
    3       3 2008-10-01      UK      F  25     3     5     5     5     2
    1       1 2008-10-24      US      M  32     5     4     5     5     5
    4       4 2008-10-12      UK      M  39     3     3     4    NA    NA
    2       2 2008-10-28      US      F  45     3     5     2     5     5
    5       5 2009-05-01      UK      F  NA     2     2     1     2     1
      stringAsFactors agecat
    3           FALSE  Young
    1           FALSE  Young
    4           FALSE  Young
    2           FALSE  Young
    5           FALSE   <NA>
    > 
    > 
    > attach(leadership)
    The following objects are masked _by_ .GlobalEnv:
    
        age, country, gender, manager
    
    > newdata <- leadership[order(gender, age),]
    > detach(leadership)
    > newdata
      manager   testDate country gender age item1 item2 item3 item4 item5
    3       3 2008-10-01      UK      F  25     3     5     5     5     2
    2       2 2008-10-28      US      F  45     3     5     2     5     5
    5       5 2009-05-01      UK      F  NA     2     2     1     2     1
    1       1 2008-10-24      US      M  32     5     4     5     5     5
    4       4 2008-10-12      UK      M  39     3     3     4    NA    NA
      stringAsFactors agecat
    3           FALSE  Young
    2           FALSE  Young
    5           FALSE   <NA>
    1           FALSE  Young
    4           FALSE  Young
    > 
    > attach(leadership)
    The following objects are masked _by_ .GlobalEnv:
    
        age, country, gender, manager
    
    > newdata <- leadership[order(gender, -age),]
    > detach(leadership)
    > newdata
      manager   testDate country gender age item1 item2 item3 item4 item5
    5       5 2009-05-01      UK      F  NA     2     2     1     2     1
    2       2 2008-10-28      US      F  45     3     5     2     5     5
    3       3 2008-10-01      UK      F  25     3     5     5     5     2
    4       4 2008-10-12      UK      M  39     3     3     4    NA    NA
    1       1 2008-10-24      US      M  32     5     4     5     5     5
      stringAsFactors agecat
    5           FALSE   <NA>
    2           FALSE  Young
    3           FALSE  Young
    4           FALSE  Young
    1           FALSE  Young
    > 
    

    9. 数据集的合并

    9.1 添加列

    > patientID <- c(1, 2, 3, 4)
    > age <- c(25, 34, 28, 52)
    > status <- c("poor", "improved", "excellent", "poor")
    > gender <- c("F", "M", "M", "F")
    > dataframeA <- data.frame(patientID, gender)
    > dataframeA
      patientID gender
    1         1      F
    2         2      M
    3         3      M
    4         4      F
    > dataframeB <- data.frame(patientID, age, status)
    > dataframeB
      patientID age    status
    1         1  25      poor
    2         2  34  improved
    3         3  28 excellent
    4         4  52      poor
    > total <- merge(dataframeA, dataframeB, by="ID")
    Error in fix.by(by.x, x) : 'by' must specify a uniquely valid column
    > total <- merge(dataframeA, dataframeB, by="patientID")
    > total
      patientID gender age    status
    1         1      F  25      poor
    2         2      M  34  improved
    3         3      M  28 excellent
    4         4      F  52      poor
    > total <- merge(dataframeA, dataframeB, by=c("gender", "age"))
    Error in fix.by(by.x, x) : 'by' must specify a uniquely valid column
    > total <- merge(dataframeA, dataframeB, by=c("patientID", "age"))
    Error in fix.by(by.x, x) : 'by' must specify a uniquely valid column
    > 
    > total <- cbind(dataframeA, dataframeB)
    > total
      patientID gender patientID age    status
    1         1      F         1  25      poor
    2         2      M         2  34  improved
    3         3      M         3  28 excellent
    4         4      F         4  52      poor
    > 
    

    9.2 添加行

    > total <- rbind(dataframeA, dataframeB)
    Error in rbind(deparse.level, ...) : 
      numbers of columns of arguments do not match
    

    10. 数据集取子集

    10.1 选入(保留)变量

    > newdata <- leadership[, c(6:10)]
    > newdata
      item1 item2 item3 item4 item5
    1     5     4     5     5     5
    2     3     5     2     5     5
    3     3     5     5     5     2
    4     3     3     4    NA    NA
    5     2     2     1     2     1
    > 
    > 
    > myvars <- c("item1","item2","item3","item4","item5")
    > newdata <- leadership[myvars]
    > newdata
      item1 item2 item3 item4 item5
    1     5     4     5     5     5
    2     3     5     2     5     5
    3     3     5     5     5     2
    4     3     3     4    NA    NA
    5     2     2     1     2     1
    > 
    > 
    > myvar <- paste("item", 1:5, seq="")
    > myvar
    [1] "item 1 " "item 2 " "item 3 " "item 4 " "item 5 "
    > myvar <- paste("item", 1:5, sep="")
    > myvar
    [1] "item1" "item2" "item3" "item4" "item5"
    > newdata <- leadership[myvars]
    > newdata
      item1 item2 item3 item4 item5
    1     5     4     5     5     5
    2     3     5     2     5     5
    3     3     5     5     5     2
    4     3     3     4    NA    NA
    5     2     2     1     2     1
    > 

    10.2 剔除(丢弃)变量

    > myvars <- names(leadership) %in% c("item3", "item4")
    > myvars
     [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE
    > newdata <- leadership[!myvars]
    > newdata
      manager   testDate country gender age item1 item2 item5 stringAsFactors
    1       1 2008-10-24      US      M  32     5     4     5           FALSE
    2       2 2008-10-28      US      F  45     3     5     5           FALSE
    3       3 2008-10-01      UK      F  25     3     5     2           FALSE
    4       4 2008-10-12      UK      M  39     3     3    NA           FALSE
    5       5 2009-05-01      UK      F  NA     2     2     1           FALSE
      agecat
    1  Young
    2  Young
    3  Young
    4  Young
    5   <NA>
    > 
    > 
    > names(leadership)
     [1] "manager"         "testDate"        "country"         "gender"         
     [5] "age"             "item1"           "item2"           "item3"          
     [9] "item4"           "item5"           "stringAsFactors" "agecat"         
    > 
    > newdata <- leadership[c(-8,-9)]
    > newdata
      manager   testDate country gender age item1 item2 item5 stringAsFactors
    1       1 2008-10-24      US      M  32     5     4     5           FALSE
    2       2 2008-10-28      US      F  45     3     5     5           FALSE
    3       3 2008-10-01      UK      F  25     3     5     2           FALSE
    4       4 2008-10-12      UK      M  39     3     3    NA           FALSE
    5       5 2009-05-01      UK      F  NA     2     2     1           FALSE
      agecat
    1  Young
    2  Young
    3  Young
    4  Young
    5   <NA>
    > leadership$item3 <- leadership$item4 <- NULL
    > leadership
      manager   testDate country gender age item1 item2 item5 stringAsFactors
    1       1 2008-10-24      US      M  32     5     4     5           FALSE
    2       2 2008-10-28      US      F  45     3     5     5           FALSE
    3       3 2008-10-01      UK      F  25     3     5     2           FALSE
    4       4 2008-10-12      UK      M  39     3     3    NA           FALSE
    5       5 2009-05-01      UK      F  NA     2     2     1           FALSE
      agecat
    1  Young
    2  Young
    3  Young
    4  Young
    5   <NA>
    > 

    10.3 选入观测

    > newdata <- leadership[1:3,]
    > newdata
      manager   testDate country gender age item1 item2 item5 stringAsFactors
    1       1 2008-10-24      US      M  32     5     4     5           FALSE
    2       2 2008-10-28      US      F  45     3     5     5           FALSE
    3       3 2008-10-01      UK      F  25     3     5     2           FALSE
      agecat
    1  Young
    2  Young
    3  Young
    > newdata <- leadership[which(leadership$gender=="M" & leadership$age > 30),]
    > newdata
      manager   testDate country gender age item1 item2 item5 stringAsFactors
    1       1 2008-10-24      US      M  32     5     4     5           FALSE
    4       4 2008-10-12      UK      M  39     3     3    NA           FALSE
      agecat
    1  Young
    4  Young
    > attach(leadership)
    The following objects are masked _by_ .GlobalEnv:
    
        age, country, gender, manager
    
    > newdata1 <- leadership[which(gender=='M' & age > 30),]
    > detach(leadership)
    > newdata1
      manager   testDate country gender age item1 item2 item5 stringAsFactors
    2       2 2008-10-28      US      F  45     3     5     5           FALSE
      agecat
    2  Young
    > 
    
    > leadership$date <- as.Date(leadership$date, "%m/%d/%y")
    Error in as.Date.default(leadership$date, "%m/%d/%y") : 
      do not know how to convert 'leadership$date' to class “Date”
    > leadership$testDate <- as.Date(leadership$testDate, "%m/%d/%y")
    > startdate <- as.Date("2009-01-01")
    > enddate <- as.Date("2009-10-31")
    > newdate <- leadership[which(leadership$testDate >= startdate & leadership$testDate <= enddate),]
    > newdate
      manager   testDate country gender age item1 item2 item5 stringAsFactors
    5       5 2009-05-01      UK      F  NA     2     2     1           FALSE
      agecat
    5   <NA>
    > 

    10.4 subset() 函数

    > leadership
      manager   testDate country gender age item1 item2 item5 stringAsFactors
    1       1 2008-10-24      US      M  32     5     4     5           FALSE
    2       2 2008-10-28      US      F  45     3     5     5           FALSE
    3       3 2008-10-01      UK      F  25     3     5     2           FALSE
    4       4 2008-10-12      UK      M  39     3     3    NA           FALSE
    5       5 2009-05-01      UK      F  NA     2     2     1           FALSE
      agecat
    1  Young
    2  Young
    3  Young
    4  Young
    5   <NA>
    > newdata <- subset(leadership, age >= 35 | age < 24, select=c(item1, item2, item5))
    > newdata
      item1 item2 item5
    2     3     5     5
    4     3     3    NA
    > 
    > newdata <- subset(leadership, gender=="M" & age > 25, select=gender:item5)
    > newdata
      gender age item1 item2 item5
    1      M  32     5     4     5
    4      M  39     3     3    NA
    > 

    10.5 随机抽样

    > leadership
      manager   testDate country gender age item1 item2 item5 stringAsFactors
    1       1 2008-10-24      US      M  32     5     4     5           FALSE
    2       2 2008-10-28      US      F  45     3     5     5           FALSE
    3       3 2008-10-01      UK      F  25     3     5     2           FALSE
    4       4 2008-10-12      UK      M  39     3     3    NA           FALSE
    5       5 2009-05-01      UK      F  NA     2     2     1           FALSE
      agecat
    1  Young
    2  Young
    3  Young
    4  Young
    5   <NA>
    > 
    > mysample <- leadership[sample(1:nrow(leadership), 3, replace=FALSE),]
    > mysample
      manager   testDate country gender age item1 item2 item5 stringAsFactors
    4       4 2008-10-12      UK      M  39     3     3    NA           FALSE
    2       2 2008-10-28      US      F  45     3     5     5           FALSE
    1       1 2008-10-24      US      M  32     5     4     5           FALSE
      agecat
    4  Young
    2  Young
    1  Young
    > 
  • 相关阅读:
    JS调用App方法及App调用JS方法
    提升用户体验之 选用合适的鼠标光标
    js仿QQ拖拽删除
    Linux下安装 mongodb
    QQ分享-定制分享卡片
    js判断浏览器语言实现网站国际化
    js复制内容到剪切板
    为什么会有堆内存和栈内存之分
    Avro实现RPC
    zookeeper学习day01
  • 原文地址:https://www.cnblogs.com/wnzhong/p/7496188.html
Copyright © 2011-2022 走看看