zoukankan      html  css  js  c++  java
  • 代码一段

    import requests
    from bs4 import beautifulsoup
    import datetime
    import pandas as pd
    import matplotlib.pyplot as plt
    import re
    import jieba
    import numpy as np
    from wordcloud import wordcloud,imagecolorgenerator

    url = 'https://comment.bilibili.com/92542241.xml'
    r = requests.get(url)
    r.encoding = 'utfs'

    soup = beautifulsoup(r.text,'lxml')
    d = soup.find_all('d')

    dlst = []
    n = 0
    for i in d:
    n = n+1
    danmuku = []
    danmuku['弹幕'] = i.text
    danmuku['网址'] = url
    danmuku['时间'] = datetime.date.today()
    dlst.append(danmuku)

    df = pd.dataframe(dlst)

    with open('sign.txt','w',encoding='utfs') as f:
    for text in df['弹幕'].values:
    pattern = re.compile()
    filter_data = re.findall(pattern,text)
    f.write('',join(filter_data))

    with open('sign.txt','r',encoding='utfs') as f:
    data = f.read()
    segment = jieba.lcut(data)
    words_df = pd.dataframe(('segment': segment))

    word_stat = words_df.groupby(by=['segment']['segment'].agg(('计数':np.size))
    word_stat = word_stat.reset_index().sort_values(by=['计数'],ascending=False)

    color_mak = imread('01.jpg')

    wordcloud = wordcloud
    font_path=""
    background_color="white"
    max_word=3000
    mask=color_mask
    max_font_size=200
    random_atate=100
    width=1000,height=860,margin=2,

    word_frequence = {x[0]:x[1] for x in words_stat.head(500).values}
    word_frequence_dict = []
    for key in word_frequence:
    word_frequence_dict(key) = word_frequence[key]

    wordcloud.generate_from_frequencies(word_frequence_dict)
    wordcloud.to_file('output.png')
    plt.imshow(wordcloud)
    plt.axias('off')
    plt.show()

    今天也不知道写写什么,就敲这段代码讲爬虫的,回头再看,方便以后取用吧。
  • 相关阅读:
    WCF简单使用(分别部署在控制台和IIS上)
    WCF系列教程之WCF服务配置工具
    053547
    053546
    053545
    053544
    053543
    053542
    053541
    053540
  • 原文地址:https://www.cnblogs.com/medigrat/p/11755536.html
Copyright © 2011-2022 走看看