zoukankan      html  css  js  c++  java
  • [R] Draw a wordcloud

    # 加载rJava、Rwordseg库  
    library(rJava);  
    library(Rwordseg);  
    library(RColorBrewer);
      
    # == 读入数据  
    lecture=read.csv("G:\test.txt",sep=",",header=TRUE,fileEncoding="UTF-8");   
    # 查看前几行,看是否有字符编码问题  
    head(lecture);  
    # 获取数据集长度  
    n=length(lecture[,1]);  
    print(n)  
      
    # == 文本预处理  
    res=lecture[lecture!=" "];  
    #剔除URL  
    res=gsub(pattern="http:[a-zA-Z\/\.0-9]+","",res);   
    #剔除特殊词  
    res=gsub(pattern="[我|你|的|了|是|和|阳|创业|宁阳]","",res);       
      
    # == 分词+频数统计  
    words=unlist(lapply(X=res, FUN=segmentCN));  
    word=lapply(X=words, FUN=strsplit, " ");  
    v=table(unlist(word));    
    # 降序排序  
    v=rev(sort(v));   
    d=data.frame(word=names(v), freq=v);   
    # 过滤掉1个字和词频小于100的记录  
    d=subset(d, nchar(as.character(d$word))>1 & d$freq>=10)  
    
    require(wordcloud)
    library(RColorBrewer);
    dd = head(d, 50)
    op = par(bg = "lightyellow")      #背景为亮黄色
    rainbowLevels = rainbow((dd$freq)/(max(dd$freq) - 10))      #不知道什么意义,删除后图形无太大变化
    text(family = "Kai")
    wordcloud(d$word, d$freq, scale=c(5,0.5),  random.order=FALSE, colors=brewer.pal(8, "Dark2"),use.r.layout=FALSE) #
    par(op)
  • 相关阅读:
    2879. [NOI2012]美食节【费用流】
    luogu P1012 拼数
    luogu cover
    luogu cogs . [NOIP2003] 传染病控制 WA(1/2)
    luogu P1340 兽径管理 WA
    luogu P1342 请柬
    HTML学习笔记二
    HTML学习笔记一
    arr.sort()
    编写函数实现随机产生指定范围的整数的功能
  • 原文地址:https://www.cnblogs.com/Answer1215/p/4509065.html
Copyright © 2011-2022 走看看