zoukankan      html  css  js  c++  java
  • R基础、进阶-矩阵数据框的操作

    #1.1####
    rm(list=ls())
    age <- c(1,3,5,2,11,9,3,9,12,3)
    weight<-c(4.4,5.3,7.2,5.2,8.5,7.3,6.0,10.4,10.2,6.1)
    mean(weight)
    sd(weight)
    cor(age,weight)
    plot(age,weight)
    
    #matrix####
    a<-matrix(1:20,nrow = 4,ncol = 5);a
    cells<-c(1,26,24,68)
    rnames<-c("R1","R2")
    cnames<-c("C1","C2")
    mymatrix1<-matrix(cells,nrow = 2,ncol = 2,
                      byrow = FALSE,
                      dimnames = list(rnames,cnames));mymatrix1
    mymatrix2<-matrix(cells,nrow = 2,ncol = 2,
                      byrow = TRUE,
                      dimnames = list(rnames,cnames));mymatrix2
    x<-matrix(1:20,nrow = 2)
    x
    x[2,]
    x[,3]
    x[2,5]
    x[1,c(4,5)]  
    x[,c(4:7)]
    
    #array####
    dim1<-c("A1","A2")
    dim2<-c("B1","B2","B3")
    dim3<-c("c1","c2","c3","c4")
    mnarray<-array(1:24,c(2,3,4),dimnames = list(dim1,dim2,dim3))
    View(mnarray)
    
    #data frame####
    patientID<-c(1,2,3,4)
    age<-c(25,34,28,52)
    diabetes<-c("TYPE1","TYPE2","TYPE1","TYPE1")
    status<-c("Poor","Improved","Excellent","Poor")
    patientdata<-data.frame(patientID,age,diabetes,status)
    edit(patientdata) #临时修改
    fix(patientdata)  #改动的地方保存在dataframe
    rownames(patientdata) <- c('a','b','c','d')
    patientdata[,1:2]
    patientdata[3:4]
    patientdata[c("diabetes","status")]
    patientdata$status
    
    table(patientdata$patientID,patientdata$age)  #交叉验证
    summary(patientID)
    summary(age)
    cor(age,patientID)
    cov(age,patientID)
    plot(patientdata$age,patientdata$patientID)
    attach(CO2) 
    edit(CO2)
    plot(uptake,conc)
    detach(CO2)
    
    head(mtcars)
    with(mtcars,{   # with(data,{expression})  好处是不用频繁使用dataname$
      print(summary(mpg))
      plot(mpg,disp)
      plot(mpg,wt)
      }
    )
    with(mtcars,
         print(summary(mpg))
    )
    with(mtcars,{ 
      nokeepstats<-summary(mpg)    #with内命名新变量必须使用<<-
      keepstats<<-summary(mpg)})  
    keepstats                  
    nokeepstats
    
    status<-factor(status,order=TRUE)
    diabetes<-factor(diabetes)
    class(status)
    ls(patientdata)
    patientdata<-data.frame(patientID,age,diabetes,status)
    str(patientdata) # show data `s structure
    summary(patientdata)
    
    status<-factor(status,ordered = TRUE,
                   levels = c("Poor","Improved","Excellent"))
    
    sex<-c("1","2","2","1")
    sex<-factor(sex,levels = c(1,2),labels = c("male","female"))
    #list####
    g<-"my list"
    h<-c(25,26,18,39)
    j<-matrix(1:20,nrow = 2)
    k<-c("one","two","three")
    mylist<-list(title=g,ages=h,j,k)
    mylist
    mylist[[3]]
    mylist[["ages"]]
    
    #create new variate and recode rename
    manager<-c(1,2,3,4,5)
    date<-c("10/24/14","10/28/14","10/01/14","10/02/14","05/01/14")
    country<-c("M","F","F","M","F")
    age<-c(32,45,25,39,99)
    q1<-c(5,3,3,3,2)
    q2<-c(4,54,4,3,2)
    q3<-c(5,2,5,4,1)
    q4<-c(5,5,5,NA,2)
    q5<-c(5,5,2,NA,1)
    leadership<-data.frame(manager,date,country,age,q1,q2,q3,q4,q5,
                           stringsAsFactors = FALSE)
    
    leadership$age[leadership$age==99]<-NA
    leadership$agecat[leadership$age>75]<-"elder"
    leadership$agecat[leadership$age<=75&
                        leadership$age>=55]<-"middle aged"
    leadership$agecat[leadership$age<55]<-"young"
    #or
    leadership<-within(leadership,{
      ageact<-NA 
      ageact[age>75]<-"elder"
      ageact[age>=55&age<=75]<-"middle aged"
      ageact[age<55]<-"yough"})
    
    leadership<-within(leadership,{ 
      grade<-NA
      grade[q1>3]<-"good"       #常用语修改dataframe时不用频繁使用dataframe$
      grade[q1<=3]<-"bad"})    #compare 'with' at 64 with 'within'
    
    #修改行名、列名
    names(leadership)[2] <- 'testdate'  #列名
    names(leadership)
    row.names(leadership)              #行名
    install.packages("plyr")
    library(plyr)
    rename(leadership,c(manager="managerID"))
    
    #datetime####
    
    Sys.Date()  
    
    date() 
    
    mydate<-as.Date(c("2018-01-12","2008-01-11"))  #must be default format
    mydate
    class(mydate)
    
    #or####
    strdate<-c("2018/01/12","2008/01/12")# character
    dates<-as.Date(strdate,"%Y/%m/%d") # Date#,default format
    dates
    manager<-c(1,2,3,4,5)
    date<-c("10/24/14","10/28/14","10/01/14","10/02/14","05/01/14")
    country<-c("M","F","F","M","F")
    age<-c(32,45,25,39,99)
    q1<-c(5,3,3,3,2)
    q2<-c(4,5,4,3,2)
    q3<-c(5,2,5,4,1)
    q4<-c(5,5,5,NA,2)
    q5<-c(5,5,2,NA,1)
    leadership<-data.frame(manager,date,country,age,q1,q2,q3,q4,q5,
                           stringsAsFactors = FALSE) 
    #format ####
    
    myformat<-'%m/%d/%y'  
    leadership$date<-as.Date(leadership$date,myformat)  ;leadership$date    
    class(leadership$date) 
    View(leadership)
    
    date<-c("10/24/14","10/28/14","10/01/14","10/02/14","05/01/14")
    date<-as.Date(date,"%m/%d/%y");date     
    format(date,format="%m-%d-%Y")   #修改时间显示格式
    format(date,format="%m/%d/%Y")     
    #or
    today<-Sys.Date()
    format(today,format="%a")
    
    format(today,format="%A")
    
    startdate<-as.Date(today)
    endate   <-as.Date("1994-01-21")
    days     <-endate-startdate ;days 
    # difftime
    today<-Sys.Date()
    anniversary<-as.Date("2012-10-29")
    difftime(today,anniversary,units = "days")
    9145/365
    today<-as.character(today)
    today    
    #more  <{ help("as.Date")!"help("strftime"),package(timeDate)
    
    #order####
    rm(list = ls())
    manager<-c(1,2,3,4,5)
    date<-c("10/24/14","10/28/14","10/01/14","10/02/14","05/01/14")
    country<-c("US","US","UK","UK","UK")
    gender<-c("M","F","F","M","F")
    age<-c(32,45,25,39,99)
    q1<-c(5,3,3,3,2)
    q2<-c(4,4,4,3,2)
    q3<-c(5,2,5,4,1)
    q4<-c(5,5,5,NA,2)
    q5<-c(5,5,2,NA,1)
    leadership<-data.frame(manager,date,country,gender,age,q1,q2,q3,q4,q5,
                           stringsAsFactors = FALSE)
    
    newdata<-leadership[order(-leadership$age),];newdata 
    newdata<-leadership[order(leadership$age),] ;newdata
    #or
    attach(leadership)
    newdata<-leadership[order(gender,age),];newdata   #性别内再排序
    newdata1<-leadership[order(gender,-age),];newdata1
    detach()
    
    
    # merge dataset ####
    rm(list = ls())
    manager<-c(1,2,3,4,5)
    country<-c("US","US","UK","UK","UK")
    gender<-c("M","F","F","M","F")
    age<-c(32,45,25,39,99)
    q1<-c(5,3,3,3,2)
    q2<-c(4,4,4,3,2)
    leadership1<-data.frame(manager,country,gender,age,q1,q2,
                            stringsAsFactors = FALSE)
    
    manager<-c(1,2,3,4,5)
    date<-c("10/24/14","10/28/14","10/01/14","10/02/14","05/01/14")
    q3<-c(5,2,5,4,1)
    q4<-c(5,5,5,NA,2)
    q5<-c(5,5,2,NA,1)
    leadership2<-data.frame(manager,date,q3,q4,q5,
                            stringsAsFactors = FALSE)
    
    View(leadership1)
    View(leadership2)
    total<-merge(leadership1,leadership2,by="manager")
    
    total<-cbind(data.frame(leadership1),data.frame(leadership2))
    View(total)
    
    #subset####
    rm(list = ls())
    manager<-c(1,2,3,4,5)
    date<-c("10/24/14","10/28/14","10/01/14","10/02/14","05/01/14")
    country<-c("US","US","UK","UK","UK")
    gender<-c("M","F","F","M","F")
    age<-c(32,45,25,39,99)
    q1<-c(5,3,3,3,2)
    q2<-c(4,4,4,3,2)
    q3<-c(5,2,5,4,1)
    q4<-c(5,5,5,NA,2)
    q5<-c(5,5,2,NA,1)
    leadership<-data.frame(manager,date,country,gender,age,q1,q2,q3,q4,q5,
                           stringsAsFactors = FALSE)
    
    leadership["q1"] 
    
    newdata<-leadership[,6:10];newdata
    newdata1<-leadership[,c(6:10)];newdata1
    #or 
    mycars<-c("q1","q2","q3","q4","q5")
    newdata2<-leadership[mycars];newdata2
    #or
    mycars1<-paste("q",1:5,sep="")
    newdata3<-leadership[mycars1];newdata3
    
    mycars2<-names(leadership)%in%c("q3","q4");mycars2
    newdata4<-leadership[!mycars2];newdata4
    #or
    newdata5<-leadership[c(-8,-9)];newdata5
    #or
    leadership$q3<-NULL
    leadership$q3<-leadership$q4<-NULL
    
    newdata<-leadership[1:3,] ;newdata 
    newdata<-leadership[,1:3]  ;newdata
    #or
    newdata<-leadership[leadership$gender=="M"&
                          leadership$age>30,];newdata
    #or
    attach(leadership)
    newdata<-leadership[gender=="M"&age>30,]
    detach(leadership)
    
    leadership$date<-as.Date(leadership$date,"%m/%d/%y")
    startdate<-as.Date("2014-10-02")
    enddate<-as.Date("2014-10-25")
    newdata<-leadership[which(leadership$date>=startdate&leadership$date<=enddate),];newdata
    
    #4-10-4 subset()####
    newdata <-subset(leadership,age>30&gender=="M",
                    select = c(q1,q2,q3,q4))  
    newdata6<-subset(leadership,age>30&age<50,select = c(q1:q5));newdata6
    newdata7<-subset(leadership,age>50|age<30,select = gender:q1) ;newdata7
    
    mysample<-leadership[sample(1:8,size = 3,replace = TRUE)];mysample #samples random column
    patientID<-c(1,2,3,4)
    age<-c(25,34,28,52)
    diabetes<-c("TYPE1","TYPE2","TYPE1","TYPE1")
    status<-c("Poor","Improved","Excellent","Poor")
    status<-factor(status,order=TRUE)# 
    diabetes<-factor(diabetes)
    class(status)
    ls(patientdata)
    patientdata<-data.frame(patientID,age,diabetes,status)
    mysample<-patientdata[sample(1:ncol(patientdata),size = 3,replace = FALSE)];mysample
    
    nrow(leadership)
    
    #chapter 3####
    attach(mtcars)
    plot(wt,mpg)
    abline(lm(mpg~wt))    #adds a line of best fit
    title("regression of mpg on weight")
    detach(mtcars)
    
    pdf("mygraph.pdf")
    attach(mtcars)
    plot(wt,mpg)
    abline(lm(mpg~wt))    #adds a line of best fit
    title("regression of mpg on weight")
    detach(mtcars)
    dev.off()
    

      

    Valar morghulis
  • 相关阅读:
    互动教程,让你5分钟掌握 Flexbox 布局模式
    Fixed Responsive Nav – 响应式的单页网站导航插件
    创意无限!一组网页边栏过渡动画【附源码下载】
    12款界面精美的 HTML5 & CSS3 网站模板
    Twproject Gantt – 开源的 JavaScript 甘特图组件
    真是好东西!一组动感的页面加载动画效果
    Method Draw – 很好用的 SVG 在线编辑器
    CSS Vocabulary – CSS 词汇表,你都掌握了吗?
    前端精选文摘:BFC 神奇背后的原理
    Vis.js – 基于浏览器的动态 JavaScript 可视化库
  • 原文地址:https://www.cnblogs.com/super-yb/p/11041419.html
Copyright © 2011-2022 走看看