zoukankan      html  css  js  c++  java
  • python3 selenium模拟登陆斗鱼提取数据保存数据库

    # coding=utf-8
    from selenium import webdriver
    import json
    import time
    import pymongo


    class Douyu:
      def __init__(self):
        self.driver = webdriver.Chrome()
        # 发送首页请求
        self.driver.get("https://www.douyu.com/directory/all")
        self.host = '127.0.0.1'
        self.port = 27017
        self.DBname = 'douyu'

      def get_content(self):
        time.sleep(3)
        li_list = self.driver.find_elements_by_xpath('//ul[@id="live-list-contentbox"]/li')
        # print(li_list)

        contents = []
        # 遍历房间列表
        for i in li_list:
          item = {}
          # 获取房间图片
          item['img'] = i.find_element_by_xpath('./a//img').get_attribute("src")
          # 获取房间名字
          item['title'] = i.find_element_by_xpath('./a').get_attribute("title")
          # 获取房间分类
          item['category'] = i.find_element_by_xpath('./a/div[@class="mes"]/div/span').text
          # 获取主播名字
          item['name'] = i.find_element_by_xpath("./a/div[@class='mes']/p/span[1]").text

          # 观看人数
          item['watch_num'] = i.find_element_by_xpath("./a/div[@class='mes']/p/span[2]").text
          # print(item)
          contents.append(item)
        return contents

      # 保存到MongoDB
      def save_content(self, contents):
        # 创建MongoDB连接
        client = pymongo.MongoClient(host=self.host, port=self.port)
        # 指向指定的数据库
        mdb = client[self.DBname]
        self.post = mdb[self.DBname]
        self.post.insert(contents)

      # 保存到本地
      # def save_content(self, contents):
        # with open("douyu.json", "a") as f:
          # for content in contents:
          # json.dump(content, f, ensure_ascii=False, indent=2)
          # f.write(', ')

      def run(self):
        # 1.发送首页请求
        # 2.获取首页信息
        contents = self.get_content()
        # 3.保存内容
        self.save_content(contents)
        # 4.循环 点击下一页按钮,直到下一页对应的class名字不再是"shark-pager-next"
        # 判断有没有下一页
        while self.driver.find_element_by_class_name("shark-pager-next"):
          # 5.点击下一页按钮
          self.driver.find_element_by_class_name("shark-pager-next").click()
          # 6.获取下一页的内容
          contents = self.get_content()
          # 7.保存内容
          self.save_content(contents)

    if __name__ == '__main__':
    douyu = Douyu()
    douyu.run()

  • 相关阅读:
    克如斯卡尔 P1546
    真正的spfa
    第四课 最小生成树 要点
    关于vscode中nullptr未定义
    cmake学习笔记
    python学习笔记
    (BFS 图的遍历) 2906. kotori和迷宫
    (图论基础题) leetcode 997. Find the Town Judge
    (BFS DFS 并查集) leetcode 547. Friend Circles
    (BFS DFS 图的遍历) leetcode 841. Keys and Rooms
  • 原文地址:https://www.cnblogs.com/x-pyue/p/7798899.html
Copyright © 2011-2022 走看看