zoukankan      html  css  js  c++  java
  • 爬虫大作业

    f = open("C:/Users/ZD/PycharmProjects/test/test.txt", 'w+', encoding='utf8')
    import jieba
    import requests
    from bs4 import BeautifulSoup


    def songlist(url):
    res = requests.get(url)
    res.encoding = 'UTF-8'
    soup = BeautifulSoup(res.text, 'html.parser')
    songname = soup.select('.song')
    for i in songname[1:]:
    url = i.select('a')[0].attrs['href']
    songread(url)


    def songread(url):
    f = open("C:/Users/ZD/PycharmProjects/test/test.txt", 'w+', encoding='utf8')
    res = requests.get(url)
    res.encoding = 'UTF-8'
    soup = BeautifulSoup(res.text, 'html.parser')
    song = soup.select('.lrcItem')
    for i in song:
    f.write(i.text)


    songlist('http://www.kuwo.cn/geci/a_336/?')
    f = open("C:/Users/ZD/PycharmProjects/test/test.txt", 'r', encoding='utf8')
    str = f.read()
    f.close()

    wordList = jieba.cut(str)
    wordList = list(jieba.cut(str))

    wordDic = {}
    for i in set(wordList):
    wordDic[i] = wordList.count(i)

    sort_word = sorted(wordDic.items(), key=lambda d: d[1], reverse=True)
    for i in range(60):
    print(sort_word[i])

    fo = open("C:/Users/ZD/PycharmProjects/test/test1.txt", 'w', encoding='utf8')
    for i in range(60):
    fo.write(sort_word[i][0] + ' ')

    fo.close()
  • 相关阅读:
    Memcache第一篇---基础教程
    【分享】一位技术人员成长历程
    simhash
    Cookie mapping技术
    压缩编码基础知识
    shell vim--处理二进制文本
    CSDN泄漏数据完整分析
    day17 isinstance type issubclass 反射
    day16 类之间的关系 特殊成员
    day14 面向对象
  • 原文地址:https://www.cnblogs.com/XiaoFengLuo/p/9077183.html
Copyright © 2011-2022 走看看