zoukankan      html  css  js  c++  java
  • 代码一段

    import requests
    from bs4 import beautifulsoup
    import datetime
    import pandas as pd
    import matplotlib.pyplot as plt
    import re
    import jieba
    import numpy as np
    from wordcloud import wordcloud,imagecolorgenerator

    url = 'https://comment.bilibili.com/92542241.xml'
    r = requests.get(url)
    r.encoding = 'utfs'

    soup = beautifulsoup(r.text,'lxml')
    d = soup.find_all('d')

    dlst = []
    n = 0
    for i in d:
    n = n+1
    danmuku = []
    danmuku['弹幕'] = i.text
    danmuku['网址'] = url
    danmuku['时间'] = datetime.date.today()
    dlst.append(danmuku)

    df = pd.dataframe(dlst)

    with open('sign.txt','w',encoding='utfs') as f:
    for text in df['弹幕'].values:
    pattern = re.compile()
    filter_data = re.findall(pattern,text)
    f.write('',join(filter_data))

    with open('sign.txt','r',encoding='utfs') as f:
    data = f.read()
    segment = jieba.lcut(data)
    words_df = pd.dataframe(('segment': segment))

    word_stat = words_df.groupby(by=['segment']['segment'].agg(('计数':np.size))
    word_stat = word_stat.reset_index().sort_values(by=['计数'],ascending=False)

    color_mak = imread('01.jpg')

    wordcloud = wordcloud
    font_path=""
    background_color="white"
    max_word=3000
    mask=color_mask
    max_font_size=200
    random_atate=100
    width=1000,height=860,margin=2,

    word_frequence = {x[0]:x[1] for x in words_stat.head(500).values}
    word_frequence_dict = []
    for key in word_frequence:
    word_frequence_dict(key) = word_frequence[key]

    wordcloud.generate_from_frequencies(word_frequence_dict)
    wordcloud.to_file('output.png')
    plt.imshow(wordcloud)
    plt.axias('off')
    plt.show()

    今天也不知道写写什么,就敲这段代码讲爬虫的,回头再看,方便以后取用吧。
  • 相关阅读:
    localhost和127.0.0.1及ip区别
    Linux常用命令大全
    百度搜红包相关代码(1)
    今天开博第一篇,呵呵
    杯具啊,中考
    新年感想
    【转】汇编语言基础
    margin与padding
    .net 中的Literal Label 控件、Literal 控件、Panel 控件和 Placeholder 控件
    HTML 5 中的新元素
  • 原文地址:https://www.cnblogs.com/medigrat/p/11755536.html
Copyright © 2011-2022 走看看