zoukankan      html  css  js  c++  java
  • 爬取虎牙直播同一时间段的所有直播间信息

    import requests
    from openpyxl import Workbook
    from bs4 import BeautifulSoup
    from tqdm import tqdm


    class LiveSpider():
    def super_spider(self):
    headers = {
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
    "AppleWebKit/537.36 (KHTML, like Gecko) "
    "Chrome/85.0.4183.102 Safari/537.36",
    "x-requested-with": "XMLHttpRequest"
    }
    Response = requests.get("https://www.huya.com/g")
    html = Response.text
    soup = BeautifulSoup(html, "html.parser")
    for gameId in tqdm(range(1, len(soup.find_all("li")))):
    worker = Workbook()
    wk = worker.active
    wk.append(["直播类型", "直播房间名", "房间号", "主播昵称", "直播介绍", "人流量"])
    params = {
    "m": "LiveList",
    "do": "getLiveListByPage",
    "gameId": gameId,
    "tagAll": "0",
    "page": "1"
    }

    response = requests.get("https://www.huya.com/cache.php", params=params, headers=headers)

    for page in range(1, int(response.json()["data"]["totalPage"]) + 1):
    params["page"] = str(page)
    live_response = requests.get("https://www.huya.com/cache.php", params=params, headers=headers)
    live_number = len(live_response.json()["data"]["datas"])
    for num in range(0, live_number):
    live = live_response.json()["data"]["datas"][num]
    lst = [live["gameFullName"], live["roomName"], live["uid"], live["nick"], live["introduction"],
    live["totalCount"]]
    try:
    wk.append(lst)
    except:
    pass
    worker.save(f"huya_live_{gameId}.xlsx")


    l = LiveSpider()
    l.super_spider()
  • 相关阅读:
    react 入坑笔记(五)
    练习
    高级指令
    进阶指令
    基础指令
    VMware 备份操作系统
    Ubuntu 用户的切换
    形态学转换
    图像模糊
    域名拆分 tld
  • 原文地址:https://www.cnblogs.com/liuyuchao/p/14017154.html
Copyright © 2011-2022 走看看