zoukankan      html  css  js  c++  java
  • 爬虫大作业

     1 f=open("C:/Users/Administrator/PycharmProjects/test/test.txt",'w+',encoding='utf8')
     2 import jieba
     3 import requests
     4 from bs4 import BeautifulSoup
     5  
     6  
     7 def songlist(url):
     8     res = requests.get(url)
     9     res.encoding = 'UTF-8'
    10     soup = BeautifulSoup(res.text, 'html.parser')
    11     songname=soup.select('.song')
    12     for i in songname[1:]:
    13         url=i.select('a')[0].attrs['href']
    14         songread(url)
    15  
    16  
    17 def songread(url):
    18     f=open("C:/Users/Administrator/PycharmProjects/test/test.txt",'w+',encoding='utf8')
    19     res = requests.get(url)
    20     res.encoding = 'UTF-8'
    21     soup = BeautifulSoup(res.text, 'html.parser')
    22     song=soup.select('.lrcItem')
    23     for i in song:
    24  
    25         f.write(i.text)
    26  
    27  
    28  
    29 songlist('http://www.kuwo.cn/geci/a_266986/')
    30 f=open("C:/Users/Administrator/PycharmProjects/test/test.txt",'r',encoding='utf8')
    31 str=f.read()
    32 f.close()
    33  
    34 wordList=jieba.cut(str)
    35 wordList=list(jieba.cut(str))
    36  
    37 wordDic = {}
    38 for i in set(wordList):
    39     wordDic[i] = wordList.count(i)
    40  
    41 sort_word = sorted(wordDic.items(), key=lambda d: d[1], reverse=True)
    42 for i in range(60):
    43     print(sort_word[i])
    44  
    45  
    46 fo=open("C:/Users/Administrator/PycharmProjects/test/test1.txt",'w',encoding='utf8')
    47 for i in range(60):
    48     fo.write(sort_word[i][0] +'
    ')
    49  
    50 fo.close()

    将高频词汇放入test.txt以后打开 http://www.picdata.cn/ 用网上词云生成图片

  • 相关阅读:
    background和background-size
    获取表单的初始值,模拟placeholder属性
    input[type=checkbox]
    background-size
    input的type属性的修改
    选项卡切换
    2016.12.13
    3. 如何封装查询条件与查询结果到map中
    Java 实现网站当前在线用户统计
    sell-- wordPOI
  • 原文地址:https://www.cnblogs.com/Molemole/p/8973497.html
Copyright © 2011-2022 走看看