zoukankan      html  css  js  c++  java
  • 【作业】jieba中文词云

      涉及到的数据库代码

    class JobPositionDao(BaseDao):
    
        def __init__(self):
            super().__init__()
        #   查询python所有岗位的描述
        def findPythonDetail(self):
            sql = "SELECT jd.detail_desciption FROM `job_position_detail` as jd,`job_position` as jp,`job_collect_task` as jc WHERE jd.detail_positionid=jp.job_id and jp.job_taskid=5;"
            result = self.execute(sql, params=None)
            self.commit()
            return self.fetch()
            pass

      绘制代码

    import jieba
    from day022.spiderproject.spiderproject.dao.jobpositiondao import JobPositionDao
    import matplotlib.pyplot as plt
    from wordcloud import WordCloud,ImageColorGenerator
    import numpy as np
    from PIL import Image
    from functools import reduce
    jp = JobPositionDao()
    txtList = jp.findPythonDetail()
    txtList = [i[0] for i in txtList]
    print(txtList)
    with open("PythonDetail.txt","w",encoding="utf8") as fp:
        fp.writelines(txtList)
        pass
    
    with open("PythonDetail.txt","r",encoding="utf8") as fp:
        txtStr = fp.read()
        pass
    
    with open("StopWords.txt","r",encoding="utf8") as fp:
        stopWords = fp.read().split(",")
        pass
    
    wordList = jieba.lcut(txtStr)
    wordList = [w for w in wordList if w not in stopWords]# 去除屏蔽词
    wordsStr = reduce(lambda a,b:a+" "+b,wordList)# 将词连接成字符串,用空格分开
    
    imgMask = np.array(Image.open("alice_mask.png"))
    wordCloud = WordCloud(max_words=200,mask=imgMask,background_color="white",font_path="C:/Windows/Fonts/SIMYOU.TTF").generate(wordsStr)#  加载字体支持中文
    plt.imshow(wordCloud)
    plt.axis('off')
    plt.show()

      结果

  • 相关阅读:
    基数排序
    阅读笔记
    构建之法阅读笔记
    找水王续
    单元测试
    个人总结
    表单整数的校验问题
    HTML 表格的书写方式:
    rgba兼容IE系列
    "position:relative"在IE中的Bug
  • 原文地址:https://www.cnblogs.com/dofstar/p/11489654.html
Copyright © 2011-2022 走看看