zoukankan      html  css  js  c++  java
  • python3 selenium模拟登陆斗鱼提取数据保存数据库

    # coding=utf-8
    from selenium import webdriver
    import json
    import time
    import pymongo


    class Douyu:
      def __init__(self):
        self.driver = webdriver.Chrome()
        # 发送首页请求
        self.driver.get("https://www.douyu.com/directory/all")
        self.host = '127.0.0.1'
        self.port = 27017
        self.DBname = 'douyu'

      def get_content(self):
        time.sleep(3)
        li_list = self.driver.find_elements_by_xpath('//ul[@id="live-list-contentbox"]/li')
        # print(li_list)

        contents = []
        # 遍历房间列表
        for i in li_list:
          item = {}
          # 获取房间图片
          item['img'] = i.find_element_by_xpath('./a//img').get_attribute("src")
          # 获取房间名字
          item['title'] = i.find_element_by_xpath('./a').get_attribute("title")
          # 获取房间分类
          item['category'] = i.find_element_by_xpath('./a/div[@class="mes"]/div/span').text
          # 获取主播名字
          item['name'] = i.find_element_by_xpath("./a/div[@class='mes']/p/span[1]").text

          # 观看人数
          item['watch_num'] = i.find_element_by_xpath("./a/div[@class='mes']/p/span[2]").text
          # print(item)
          contents.append(item)
        return contents

      # 保存到MongoDB
      def save_content(self, contents):
        # 创建MongoDB连接
        client = pymongo.MongoClient(host=self.host, port=self.port)
        # 指向指定的数据库
        mdb = client[self.DBname]
        self.post = mdb[self.DBname]
        self.post.insert(contents)

      # 保存到本地
      # def save_content(self, contents):
        # with open("douyu.json", "a") as f:
          # for content in contents:
          # json.dump(content, f, ensure_ascii=False, indent=2)
          # f.write(', ')

      def run(self):
        # 1.发送首页请求
        # 2.获取首页信息
        contents = self.get_content()
        # 3.保存内容
        self.save_content(contents)
        # 4.循环 点击下一页按钮,直到下一页对应的class名字不再是"shark-pager-next"
        # 判断有没有下一页
        while self.driver.find_element_by_class_name("shark-pager-next"):
          # 5.点击下一页按钮
          self.driver.find_element_by_class_name("shark-pager-next").click()
          # 6.获取下一页的内容
          contents = self.get_content()
          # 7.保存内容
          self.save_content(contents)

    if __name__ == '__main__':
    douyu = Douyu()
    douyu.run()

  • 相关阅读:
    AppDelegate生命周期详解
    Python基础数据类型
    Linux入门基础
    mac命令节选
    如何在mac下安装android sdk
    Uiatomator2 初步探索
    Uiautumator2学习,Gradle部分
    软件测试基础方法总结
    monkey测试小结
    链表中环的入口结点
  • 原文地址:https://www.cnblogs.com/x-pyue/p/7798899.html
Copyright © 2011-2022 走看看