zoukankan      html  css  js  c++  java
  • [R] Draw a wordcloud

    # 加载rJava、Rwordseg库  
    library(rJava);  
    library(Rwordseg);  
    library(RColorBrewer);
      
    # == 读入数据  
    lecture=read.csv("G:\test.txt",sep=",",header=TRUE,fileEncoding="UTF-8");   
    # 查看前几行,看是否有字符编码问题  
    head(lecture);  
    # 获取数据集长度  
    n=length(lecture[,1]);  
    print(n)  
      
    # == 文本预处理  
    res=lecture[lecture!=" "];  
    #剔除URL  
    res=gsub(pattern="http:[a-zA-Z\/\.0-9]+","",res);   
    #剔除特殊词  
    res=gsub(pattern="[我|你|的|了|是|和|阳|创业|宁阳]","",res);       
      
    # == 分词+频数统计  
    words=unlist(lapply(X=res, FUN=segmentCN));  
    word=lapply(X=words, FUN=strsplit, " ");  
    v=table(unlist(word));    
    # 降序排序  
    v=rev(sort(v));   
    d=data.frame(word=names(v), freq=v);   
    # 过滤掉1个字和词频小于100的记录  
    d=subset(d, nchar(as.character(d$word))>1 & d$freq>=10)  
    
    require(wordcloud)
    library(RColorBrewer);
    dd = head(d, 50)
    op = par(bg = "lightyellow")      #背景为亮黄色
    rainbowLevels = rainbow((dd$freq)/(max(dd$freq) - 10))      #不知道什么意义,删除后图形无太大变化
    text(family = "Kai")
    wordcloud(d$word, d$freq, scale=c(5,0.5),  random.order=FALSE, colors=brewer.pal(8, "Dark2"),use.r.layout=FALSE) #
    par(op)
  • 相关阅读:
    滴滴日送400万红包,仅仅为人群不冷漠?
    C++提供的四种新式转换--const_cast dynamic_cast reinterpret_cast static_cast
    GreenDao开源ORM框架浅析
    Python 计数器
    Linux虚拟内存的添加
    Linux iptables
    Python set
    Python dict get items pop update
    Python contains
    Python reverse
  • 原文地址:https://www.cnblogs.com/Answer1215/p/4509065.html
Copyright © 2011-2022 走看看