zoukankan      html  css  js  c++  java
  • 用emoji表情包来可视化北京市历史天气状况!

    用emoji表情包来可视化北京市历史天气状况!

    最近有了一个突如其来的想法,主要是看到了R社区有大神做了emoji表情包,并已经打通了ggplot的链接,所以想用ggplot结合emoji表情做一期天气可视化!

    library(RCurl)
    library(XML)
    library(dplyr)
    library(stringr)
    library(tidyr)
    library(plyr)
    library(rvest)
    library(ggimage)
    library(Cairo)
    library(showtext)
    library(lubridate)
    

    以下是北京2016年全年日度历史天气的获取过程!

    url<-"http://lishi.tianqi.com/beijing/index.html"
    myheader <-c("User-Agent"="Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36")
    webpage<-getURL(url,httpheader=myheader)
    mymonthlink<-getHTMLLinks(url,externalOnly=TRUE)%>%grep(".*?2016\d{2}.html",.,value=T)
    

    刚开始信誓旦旦的要用RCurl去爬,结果给我整蒙逼了,不是爬不了,数据弄出来太碎了,后来我用了rvest。

    ####
    #page1<-getURL(mymonthlink[2],.encoding="gbk")
    #rd<-iconv(page1,"gbk","utf-8")
    #rdhtml<-htmlParse(rd,encoding="UTF-8")
    #cesh<-readHTMLList(rdhtml,trim=TRUE,elFun=xmlValue)%>%grep("\d{4}-\d{2}-\d{2}",.,value=T)
    #cesh<-cesh%>%sub("([a-z])(\()(\)","",.)
    #cesh<-cesh1%>%str_split(',')%>%plyr::ldply(.fun=NULL)
    #cesh$V1<-cesh$V1%>%sub("[a-z]\(","",.)%>%as.Date()
    #names(cesh)<-c("date","high","low","state","wind","index")
    ####
    以上代码写了一半写不下去了,我有rvest为啥要用RCurl,肯定自己脑抽筋了!
    

    用了rvest就轻松多了!

    mynewdata<-c()
    for (i in mymonthlink){
    mymonthdata<-read_html(i,encoding="gbk")%>%html_nodes("div.tqtongji2>ul")%>%html_text(trim=FALSE)%>%str_trim(.,side="right")%>%.[-1]
    mynewdata<-c(mynewdata,mymonthdata)
    }
    

    爬出来弄成一个 向量了,需要分列,其实可以直接使用节点区每一个变量的值,但是那样我觉得太麻烦!所以简单粗暴,爬到一起然后使用stringr去处理!

    mynewdata1<-mynewdata
    mynewdata<-mynewdata1%>%gsub("			|	|
    ","",.)%>%str_split('   ')%>%plyr::ldply(.fun=NULL)%>%.[,-2]
    names(mynewdata)<-c("date","high","low","state","wind","index")
    mynewdata$date<-as.Date(mynewdata$date)
    mynewdata$high<-as.numeric(mynewdata$high)
    mynewdata$low<-as.numeric(mynewdata$low)
    

    将天气进行归类!

    unique(mynewdata$state)
    happy<-c("晴","阵雨~晴","多云转晴","多云~晴","雷阵雨~晴","阴~晴","霾~晴","浮尘~晴")
    depressed<-c("霾","阴","多云","晴~多云","霾~多云","晴~霾","多云~霾","阵雨转多云","多云转阴","阴~多云","多云~阴","晴~阴","阵雨~多云","小雨~多云","小雨~阴","霾~雾","小雪~阴","阴~小雪","小雨~雨夹雪")
    angry<-c("小雨","雨夹雪","小雪","雷阵雨","阵雨","中雨","小到中雨","雷阵雨~阴","多云~雷阵雨","阴~雷阵雨","霾~雷阵雨","多云~阵雨","晴~阵雨","阴~小雨","阵雨~小雨")
    Terrified<-c("中到大雨","暴雨","雷阵雨~中到大雨")    
    

    分类赋值!

    mynewdata$mode<-NULL
    mynewdata$mood<-ifelse(mynewdata$state%in% happy,"happy",ifelse(mynewdata$state%in% depressed,"depressed",ifelse(mynewdata$state%in% angry,"angry","Terrified")))    
    

    按照分类匹配emoji表情代码:

    mynewdata <- within(mynewdata,{
      mood_code <- NA
      mood_code[mood=="happy"]<-"1f604"
      mood_code[mood=="depressed"]<-"1f633"
      mood_code[mood=="angry"]<-"1f62d"
      mood_code[mood=="Terrified"]<-"1f621"
    })       
    

    创建多个时间变量!

    mynewdata$month<-as.numeric(as.POSIXlt(mynewdata$date)$mon+1)
    mynewdata$monthf<-factor(mynewdata$month,levels=as.character(1:12),labels=c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"),ordered=TRUE)    
    mynewdata$weekday<-as.POSIXlt(mynewdata$date)$wday
    mynewdata$weekdayf<-factor(mynewdata$weekday,levels=rev(0:6),labels=rev(c("Sun","Mon","Tue","Wed","Thu","Fri","Sat")),ordered=TRUE)
    mynewdata$week <- as.numeric(format(mynewdata$date,"%W"))
    mynewdata<-ddply(mynewdata,.(monthf),transform,monthweek=1+week-min(week))
    mynewdata$day<-day(mynewdata$date)
    

    读写数据,最怕整理好了断网了或者关机了什么的,所以要市场做好备份!

    write.table(mynewdata,"historyweather.csv",sep=",",row.names=FALSE)
    mynewdata<-read.csv("historyweather.csv",stringsAsFactors = FALSE,check.names = FALSE)  
    

    图一的主题:

    mytheme<-theme(
             rect=element_blank(),
             axis.ticks=element_blank(),
             text=element_text(face="plain",lineheight=0.9,hjust=0.5,vjust=0.5,size=15),
             title=element_text(face="plain",lineheight=0.9,hjust=0,vjust=0.5,size=30),
             axis.title=element_blank(), 
             strip.text=element_text(size = rel(0.8)), 
             plot.margin = unit(c(5,2,5,2),"lines")
             )
    

    图一效果:

    CairoPNG("emoji1.png",1000,870)
    showtext.begin()
    ggplot(mynewdata,aes(weekdayf,monthweek,fill=high))+
    geom_tile(colour='white')+
    scale_fill_gradient(low=NA, high=NA,guide=FALSE)+
    ggtitle("The emoji-weather visualization of beijing in 2016")+
    scale_y_reverse(breaks=seq(from=6,to=0,by=-1))+
    ggimage::geom_emoji(aes(image=mood_code),size=.1)+
    facet_wrap(~monthf ,nrow=3)+
    mytheme
    showtext.end()
    dev.off()
    
    图二主题:
    mytheme2<-theme(
             rect=element_blank(),
             axis.ticks=element_blank(),
             text=element_text(face="plain",lineheight=0.9,hjust=0.5,vjust=0.5,size=15),
             title=element_text(face="plain",lineheight=0.9,hjust=0,vjust=0.5,size=30),
             axis.title=element_blank(), 
             strip.text=element_text(size = rel(0.8)), 
             plot.margin = unit(c(1,1,1,1),"lines")
             )
    

    图二效果:

    setwd("F:/数据可视化/R/R语言学习笔记/可视化/ggplot2/商务图表")
    CairoPNG("emoji2.png",1200,1200)
    showtext.begin()
    ggplot(mynewdata,aes(x=factor(day),y=monthf,fill=high))+
    geom_tile(colour='white')+
    expand_limits(y =c(-12,12))+
    scale_x_discrete(position=c("bottom"))+
    coord_polar(theta="x")+
    scale_fill_gradient(low=NA, high=NA,guide=FALSE)+
    ggimage::geom_emoji(aes(image=mood_code),size=.015)+
    geom_image(aes(x=0,y=-12),image ="weather.png", size =.15)+
    ggtitle("The emoji-weather visualization of beijing in 2016")+
    mytheme2
    showtext.end()
    dev.off()
    

    OK了,做完收工~

    作者简介:

    -------

    wechat:ljty1991 
    Mail:578708965@qq.com 
    个人公众号:数据小魔方(datamofang) 
    团队公众号:EasyCharts 
    qq交流群:[魔方学院]553270834

  • 相关阅读:
    Python注释及变量
    MySQL期末测试
    SQL查询第三次训练(重点关照对象)
    MySQL内置函数
    聚类-kmeans
    《达.芬奇密码》丹-布朗
    皮克定理与证明
    常见设计模式的种类与一些原则
    时间序列(二)分解、各部分计算方法
    ADF检验
  • 原文地址:https://www.cnblogs.com/timssd/p/9844361.html
Copyright © 2011-2022 走看看