zoukankan      html  css  js  c++  java
  • R语言数据可视化部分

    数据挖掘

    R语言数据可视化部分

    散点图

    library(datasets)

    head(cars)

    plot(cars)

     

    plot(cars$dist,cars$speed,xlab = "dist", ylab = "speed",xlim = c(0,20))

     

    attach(cars)

    plot(dist,speed,xlab = "dist",ylab = "speed")

     

    plot(cars$dist,cars$speed,xlab = "dist",ylab = "speed",main = "cars 散点图",type="b",pch=4,lwd=2,col="red")

     

    a<-c(20,30,40,50,60)

    b<-c(11,22,33,44,66)

    c<-c(15,25,35,45,55)

    plot(a,b,type = "b")#有点,用线连接

    plot(a,b,type = "o")#有点,线连接,线过点

    plot(a,b,type = "p")#点图

    plot(a,b,type = "h")#用线表示值

    plot(a,b,type = "l")#直接线链接

    plot(a,b,type = "s")#梯形图

    plot(a,b,type = "S")#梯形图

    plot(a,b,type = "n")#不做图

     

    plot(a,b,pch=0)

    plot(a,b,pch=1)

    plot(a,b,pch=2)

    plot(a,b,pch=3)

    plot(a,b,lty=2,lwd=2,pch=15,cex=2)

    plot(a,b,type="b",lty=2,lwd=2,pch=15,cex=2)

     

    install.packages("RColorBrewer")

    library(RColorBrewer)

    plot(a,b,type="b",lty=2,lwd=2,pch=15,cex=2,col=2)

    opar<-par(no.readonly = FALSE)

    par(pin=c(2,3))

    par(lwd=2,cex=1.5)

    par(cex.axis=.75,font.axis=3)

     

    plot(a,b,type="b",pch=19,lty=2,col=2)

     

    plot(a,b,type="b",pch=23,lty=5,col=2,bg="green")

    par(opar)

     

    par(mfrow = c(2, 2))

    plot(a,c,type = "o",col=3,lty=3,pch=3,lwd=4,

         main = "实验图形",xlab="a",ylab="c",

         xlim = c(0,100),ylim=c(0,100))

    x<-c(1:15)

    y<-x

    z<-10/x

    opar<-par(no.readonly = TRUE)

    par(mar=c(5,4,4,8)+0.1)

    plot(x,y,type = "b",pch=21,col=2,yaxt="n",lty=3,ann = FALSE)

    lines(x,z,type = "b",pch=3,col=4,lty=2)

    axis(side = 2,at=x,labels = x,col.axis=2,las=2)

    axis(side = 4,at=z,labels = round(z,digits = 2),

         cex.axis=2,las=2)

    par(opar)

     

    library(datasets)#加载数据包

    attenu#

    attach(attenu)

    subset(attenu,dist>=40)#选出dist列大于等于14的一行

    df<-subset(attenu,event=="2" & dist=="107",select=c(dist));df#显示特定的行

    s=subset(attenu,event=="2" & dist=="107",select=c(event,dist));s#多条件查询

    df1<-as.list(attenu);df1#转换成列表

    append(df1$event,123)#在event后部分追加123这个数

    append(df1,1235,after = 55)#指定长度追加

    data1= data.frame(Id=c(1:6),Product=c(rep("Toaster",3),rep("Radio",3)))

    data2 = data.frame(Id=c(2,4,6,7),State=c(rep("Alabama",3),rep("Ohio",1)))

    merge(data1,data2,all=T)#取两数据框并集

    merge(data1,data2,all = F)#取交集

    merge(data1,data2,all=FALSE)#只是列先后的区别

    merge(data1, data2, all.x=TRUE)#以df1所拥有的显示

    merge(data1,data2)

    #有两个及以上相同的列,合并则会自动选择两个均相同的,

    #结果同merge(df1, df2, by = c("id", "sex"))

    merge(data1,data2,by="Id")#两数据框中共有的sex那一列则会以sex.x和sex.y形式输出。

    #aggregate()函数

     

     

     

     

     

     

     

    #aggregate()函数

    mtcars#R语言自带的汽车数据集

    attach(mtcars)

    str(mtcars)#查看数据集的列名

    aggregate(mtcars,by=list(cyl,gear),FUN = mean)#使用aggregate函数按cyl(汽缸数)进行分类计算平均值

    #公式是一种特殊的R数据对象,在aggregate函数中使用公式参数可以对数据框的部分指标进行统计

    aggregate(cbind(mpg,hp) ~ cyl+gear, FUN=mean)

    #频次统计table

    table(hp)#统计mtcars数据中的hp列中出现结果频次

    #函数apply(),list,matrix,array三种形式均可以

    rname = c("r1","r2","r3")

    cname = c("c1","c2")

    b<-matrix(1:6,nrow=3, dimnames = list(rname, cname))

    apply(b,1,sum)#1—表示按行计算,2—按列计算;

    apply(b,2,sum)#1—表示按行计算,2—按列计算;

    m<-c("SPYDERMAN","BATMAN","VERTIGO","CHINATOWN")

    m1<-lapply(m,tolower)#把大写的字符改成小写

    m1

    sapply(mtcars,min)#查找数据框中最小的

    tapply(mtcars$mpg, INDEX=mtcars$mpg, FUN = mean)#计算数据框中的列或者向量里的均值

     

     

     

    条形图

    #条形图

    install.packages("vcd")

    install.packages("plottrix")

    install.packages("sm")

    install.packages("vioplot")

    library(vcd)

    a<-table(Arthritis$Treatment);a

    hist(a)

    barplot(a,main = "条形图",ylab="Frequency",

            xlab="Treatment",ylim=c(0,50))#条形图绘制

    barplot(a,main = "条形图",ylab="Treatment",

            xlab="Frequency",xlim=c(0,50),horiz=TRUE)#条形图绘制

    plot(Arthritis$Improved,main = "条形图",xlab="Improved",

         ylab="Frequency")

     

    counts <- table(Arthritis$Improved, Arthritis$Treatment)

    counts

    #堆切

    barplot(counts, main = "条形图", xlab = "Treatment",

            ylab = "Frequency", col = c("red", "yellow", "green"),

            legend = rownames(counts))

     

    #分组

    barplot(counts, main = "条形图", xlab = "Treatment",

            ylab = "Frequency", col = c("red", "yellow", "green"),

            legend = rownames(counts),

            beside = TRUE)

     

    #均值条形图

    states <- data.frame(state.region, state.x77)

    means <- aggregate(states$Illiteracy,

                       by = list(state.region),

                       FUN = mean)#以列表的形式计算均值

    means

    means <- means[order(means$x), ]#排序

    means

    barplot(means$x, names.arg = means$Group.1)#把均值做条形图

    title("Mean Illiteracy Rate")

     

    #调条形图

    par(mar = c(5, 8, 4, 2))#增加Y的边界

    par(las = 2)#旋转条形图的标签

    counts <- table(Arthritis$Improved)

    barplot(counts, main = "Treatment Outcome", horiz = TRUE,

            cex.names = 0.8,

            names.arg = c("No Improvement",

                          "Some Improvement", "Marked Improvement"))

     

    #棘状图用spine(),看比例

    attach(Arthritis)

    counts <-table(Treatment, Improved)

    spine(counts, main = "棘状图")

    detach(Arthritis)

     

    饼图

    #饼图

    par(mfrow = c(2, 2))

    s<- c(10, 12, 4, 16, 8)

    lbls <- c("A", "B", "C", "D", "E")

    pie(s,labels = lbls, main = "饼图")

    pct <- round(s/sum(s) * 100)#求百分比

    lbls2 <- paste(lbls, " ", pct, "%", sep = "")

    pie(s, labels = lbls2, col = rainbow(length(lbls)),

        main = "饼图")

     

    install.packages("plotrix")

    library(plotrix)

    pie3D(s, labels = lbls, explode = 0.1, main = "3D 饼图 ")

    mydata<-table(state.region)

    lbls <-paste(names(mydata), " ", mydata, sep = "")

    pie3D(mydata, labels = lbls, main = "饼图1")

     

     

    #扇形图

    slices<-c(10, 12, 4, 16, 8)

    lbls<-c("US", "UK", "Australia", "Germany", "France")

    fan.plot(slices, labels = lbls, main = "扇形图")

     

    散点图

    #散点图

    attach(mtcars)                                                    

    par(mfrow=c(2,2))

    plot(wt, mpg,

         main="Basic Scatterplot of MPG vs. Weight",      

         xlab="Car Weight (lbs/1000)",

         ylab="Miles Per Gallon ", pch=19)

    abline(lm(mpg ~ wt), col="red", lwd=2, lty=1)           

    lines(lowess(wt, mpg), col="blue", lwd=2, lty=2) 

    #loess(),lowess()

    library(car)

    scatterplot(mpg ~ wt | cyl, at=cyl,data=mtcars, lwd=2,

                main="Scatter Plot of MPG vs. Weight by  Cylinders",

                xlab="Weight of Car (lbs/1000)",

                ylab="Miles Per Gallon", id.method="identify",

                legend.plot=TRUE, labels=row.names(mtcars),

                boxplots="xy")

     

     

    #散点图矩阵

    attach(mtcars)

    pairs(~mpg+disp+wt+drat,data=mtcars,main="汽车数据矩阵散点图")

    library(car)

    scatterplotMatrix(~mpg+disp+drat+wt,data=mtcars,spread=FALSE,

                      smoother.args=list(lty=2),

                      main="散点图矩阵")

    #spread=FALSE;选项表示不添加展示分散度和对称信息的直线

    #smoother.args=list(lty=2);设定loess()拟合曲线是用虚线而不是实线

     

    scatterplotMatrix(~mpg+disp+drat+wt,data=mtcars,spread=TRUE,

                      smoother.args=list(lty=1),

                      main="散点图矩阵")

    #不同风格

    install.packages("glus")

    library(glus)

    install.packages("TeachingDemos")

    library(TeachingDemos)

    pairs2()

    install.packages("HH")

    installed.packages("ResourceSelection")

     

    #高密度散点图

    set.seed(1234)

    n <- 10000

    c1 <- matrix(rnorm(n, mean=0, sd=0.5), ncol=2)

    c2 <- matrix(rnorm(n, mean=3, sd=2), ncol=2)

    mydata <- rbind(c1, c2)

    mydata <- as.data.frame(mydata)

    names(mydata) <- c("x", "y")

     

    with(mydata,

         plot(x, y, pch=19, main="高密度散点图"))

     

     

    with(mydata,

         smoothScatter(x, y, main="高密度散点图"))

     

    library(hexbin)

    with(mydata, {

      bin <- hexbin(x, y, xbins=50)

      plot(bin, main="Hexagonal Binning with 10,000 Observations")

    })

     

    library(IDPmisc)#做的散点图对大数据集的创建可读性比较好

    with(mydata,

         iplot(x, y, main="Image Scatter Plot with Color Indicating Density"))

    par(opar)

     

    #多维可视化散点图

    install.packages("scatterplot3d")

    library(scatterplot3d)

    attach(mtcars)

    scatterplot3d(wt,disp,mpg,main = "3d 散点图")

    scatterplot3d(wt, disp, mpg,

                  pch=16,

                  highlight.3d=TRUE,

                  type="h",

                  main="3D 散点图")

    #highlight.3d=TRUE;填充颜色

    s3d <-scatterplot3d(wt, disp, mpg,

                        pch=16,

                        highlight.3d=TRUE,

                        type="h",

                        main="3D 散点图")

     

     

    fit <- lm(mpg ~ wt+disp)

    s3d$plane3d(fit)

    detach(mtcars)

     

    #3d旋转散点图1

    install.packages("rgl")

    library(rgl)

    attach(mtcars)

    plot3d(wt,disp,mpg,col="blue",size=5)

     

    折线图

    #展示五种树的生长情况

    Orange$Tree<- as.numeric(Orange$Tree)#转换成数值型     

    ntrees <-max(Orange$Tree);ntrees

    #创建图形

    #range返回一个包含所有给定参数的最小值和最大值的向量。

    xrange <- range(Orange$age);xrange                

    yrange <- range(Orange$circumference);yrange       

    plot(xrange, yrange,                       

         type="n",                              

         xlab="Age (days)",                     

         ylab="Circumference (mm)")                                         

    #rainbow创建一个由n个相邻颜色组成的向量。

    colors <-rainbow(ntrees)                  

    linetype <- c(1:ntrees)#绘制1:5的颜色                    

    plotchar <-seq(18,18+ntrees,1);plotchar#绘图的类型          

    #绘制图形

    for (i in 1:ntrees) {                      

      tree<-subset(Orange,Tree==i)         

      lines(tree$age,tree$circumference,     

            type="b",                          

            lwd=2,                             

            lty=linetype[i],                     

            col=colors[i],                     

            pch=plotchar[i]                    

      )                                      

    }                                           

    #lty=linetype[i], 1-5的绘图风格                   

    #col=colors[i],颜色从1-5的编号                      

    #pch=plotchar[i],18-23 的绘制符号

    title("树子的生长折线图", "绘图例子")

    #添加标签legend

    legend(xrange[1], yrange[2],               

           1:ntrees,                               

           cex=0.8,                               

           col=colors,                            

           pch=plotchar,                          

           lty=linetype,                          

           title="Tree"                           

    )                 

    #  cex=0.8图形缩放0.8倍

     

    不同包作图比较

    #不同的程序包作图比较

    data<-read.csv("J:shuju/Facet_Data.csv",sep = ",",header = T)

    str(data)

    attach(data)

    #graphics包作图

    library(ggplot2)

    plot(SOD,tau,main = "实验散点图")#散点图

    hist(SOD,breaks=30,ylim=c(0,40),main = "")

    boxplot(SOD~Class,data = data,xlab = "Class",ylab = "SOD")

     

    #lattice包作图

    library(lattice)

    xyplot(SOD~tau,col="black",main="实验散点图")

    histogram(~SOD,data,type="count",nint=30,col="white")

    bwplot(SOD~Class,data,xlab = "Class",par.settings=canonical.theme(color = FALSE))

     
     
     

     

    #ggplot2包作图

    library(graphics)

    ggplot(data,aes(x=SOD,y=tau))+geom_point(shape=21,main="实验散点图")

    ggplot(data,aes(SOD))+geom_histogram(bins=30,colour="black",fill="white")

    ggplot(data,aes(x=Class,y=SOD))+geom_boxplot()

     

     

     

    核密度函数图

    #密度图

    mtcars

    attach(mtcars)

    par(mfrow = c(2, 1))

    d<-density(mpg)

    plot(d)

    d<-density(mpg)

    plot(d, main = "Kernel Density of Miles Per Gallon")

    polygon(d, col = "red", border = "blue")#填充

    rug(mpg, col = "brown")

     

    #多组密度图

    library(sm)

    a<-factor(cyl,levels = c(4,6,8),

              labels = c("4A","6A","8A"))

    sm.density.compare(mpg,cyl,xlab="hfhhj")

    title(main = "hyhbhkajbkjhu")

    c<-c(2:(1+length(levels(a))))

    length(locator(1),levels(a),fill=colfill)

     

    马瑟克图

    #马赛克图
    ###############################################################
    #逻辑指定是否应该使用gp(参见gp)。如果TRUE和expected未指定, #
    #;则会拟合一个默认模型:如果指定了condvars(请参见strucplot), #
    #则会拟合一个相应的条件独立模型,否则将拟合一个完全独立模型 #
    ###############################################################
    ftable(Titanic)#创建“ 泰坦尼克数据”列联表。
    library(vcd)
    library(grid)
    mosaic(Titanic, shade=TRUE, legend=TRUE)

    箱线图

    #箱线图
    attach(mtcars)
    boxplot(mpg,main="箱线图",ylab="gh")
    #两个组之间的比较箱线图
    boxplot(mpg~cyl,data=mtcars,main="jjgjh",
    xlab = "hjklk",ylab = "ghjgkh")
    #两个组之间的比较箱线图
    boxplot(mpg ~ cyl, data = mtcars, notch = TRUE,
    varwidth = TRUE, col = "red",
    main = "Car Mileage Data",
    xlab = "Number of Cylinders",
    ylab = "Miles Per Gallon")
    #两个交叉因子的箱线图
    mtcars$cyl.f <- factor(mtcars$cyl,
    levels = c(4, 6, 8),
    labels = c("4", "6", "8"))

    mtcars$am.f <- factor(mtcars$am, levels = c(0, 1),
    labels = c("auto", "standard"))

    boxplot(mpg ~ am.f * cyl.f, data = mtcars,
    varwidth = TRUE, col = c("gold", "darkgreen"),
    main = "MPG Distribution by Auto Type",
    xlab = "Auto Type",notch = TRUE)

     

     

     

    相关图

    #相关图
    options(digits=5)#保留小数位数
    cor(mtcars)
    #
    library(corrgram)
    corrgram(mtcars, order=TRUE,lower.panel=panel.shade,
    upper.panel=panel.pie, text.panel=panel.txt,
    main="mtcars相互关系的相关图")
    #order=TRUE;确定是否排序;
    #panel,用于绘制每个面板内容的函数
    #####################################################################################
    #labels,用于对角面板的标签(而不是数据帧变量名)。 #
    #如果使用“order”选项,这个标签向量也会被函数适当地重新排序。 #
    ####################################################################################
    #lower.panel(upper.panel)在对角线下方/上方使用单独的面板功能。
    #diag.panel面板函数用于对角线。
    #text.panel,面板图加上标签
    #label.pos,水平和垂直的标签放置在对角板。
    #label.str,斜线标签的字符串旋转
    #cex.labels,对角线面板的图形参数。
    #font.labels,字体元素
    corrgram(mtcars, order=TRUE, lower.panel=panel.ellipse,
    upper.panel=panel.pts, text.panel=panel.txt,
    diag.panel=panel.minmax,
    main="使用散点图和椭圆的mtcar数据的相关图")

    corrgram(mtcars, lower.panel=panel.shade,
    upper.panel=NULL, text.panel=panel.txt,
    main="汽车里程数据(未排序)相关图")
    col.corrgram <- function(ncol){
    colorRampPalette(c("darkgoldenrod4", "burlywood1",
    "darkkhaki", "darkgreen"))(ncol)}
    #colorRampPalette,这些函数返回内插一组给定颜色的函数,以创建新的调色板(如topo.colors)和颜色坡道(将间隔[0,1]映射到颜色(如灰色)的函数)。
    corrgram(mtcars, order=TRUE, lower.panel=panel.shade,
    upper.panel=panel.pie, text.panel=panel.txt,
    main="拥有不同颜色的相关图")

     

     

     

  • 相关阅读:
    【Nginx】ngx_event_core_module模块
    ELMAH--Using HTTP Modules and Handlers to Create Pluggable ASP.NET Components 77 out of 90 rated th
    nyist oj 214 单调递增子序列(二) (动态规划经典)
    java 入门书籍(java7)
    ARCGIS将WGS84坐标投影到高斯平面
    【linux】linux下对java程序生成dump文件,并使用IBM Heap Analyzer进行分析,查找定位内存泄漏的问题代码
    【springboot】【socket】spring boot整合socket,实现服务器端两种消息推送
    【linux】linux修改open file 大小
    【docker】docker限制日志文件大小的方法+查看日志文件的方法
    【docker】docker部署spring boot服务,但是docker logs查看容器输出控制台日志,没有日志打印,日志未打印,docker logs不打印容器日志
  • 原文地址:https://www.cnblogs.com/cxy-0902/p/12690580.html
Copyright © 2011-2022 走看看