zoukankan      html  css  js  c++  java
  • 爬取熊猫TV,javascript,selenium,模拟点击

    from selenium import webdriver
    import csv
    
    
    def get_pages_numger(browser):
        res = browser.find_elements_by_xpath('//div[@class="page-component"]/a[7]')
        return int(res.text)
        
    def get_next_page_buttun(browser):
        button = browser.find_elements_by_xpath()
        return button
    
    def get_rooms_number_in_a_page(browser):
        res = []
        for li in browser.find_elements_by_xpath('//li[@data-id]'):
            id = li.get_attribute("data-id")
            nickname = li.find_element_by_xpath('//span[@class="video-nickname"]')
            number = li.find_element_by_xpath('//span[@class="video-number"]') # 观众数
            cate = li.find_element_by_xpath('//span[@class="video-cate"]')
            res.append([id, nickname, cate, number])
        return res
    
    def get_rooms_number_in_all_pages(browser, pages_number):
        res = [] 
        for i in range(pages_number):
            print('第{}页'.format(i+1))
            # 抓取
            res.extend(get_rooms_number_in_a_page(browser))
            
            # 点击进入下一页
            next_page_button = get_next_page_buttun(browser)
            next_page_button.click()
        
        browser.close()
        
        return res
        
        
    def save_to_csv(rooms_number):
        with open('live_rooms_number.csv', 'w') as f:
            writer = csv.writer(f, lineterminator='
    ')
            writer.writerow(['id','nickname','cate', 'number']) # 表头
            writer.writerows(rooms_number)
            
                
    def read_from_csv():
        with open('live_rooms_number.csv', 'r') as f:
            reader = csv.reader(f)
            your_list = list(reader)[1:] # 去掉表头
        #print(your_list)
        return your_list
            
    
        
        
    def get_rooms_number():
        browser = webdriver.Firefox()
    
        browser.get('http://www.panda.tv/all')
        assert '熊猫TV' in browser.title
        
        pages_number = get_pages_numger(browser)
        
        all_live_rooms_number = get_rooms_number_in_all_pages(browser, pages_number)
        
        return all_live_rooms_number
        
        
        
    if __name__ == '__main__':
        
        rooms_number = get_rooms_number()
        save_to_csv(rooms_number)
        
        #rooms_number = read_from_csv()
        
        
        
    
    
    
  • 相关阅读:
    JSLint报错翻译
    vue 选城市三级联动
    npm 安装 sass-loader 失败的解决办法
    Metasploit Framework(6)客户端渗透(上)
    Metasploit Framework(5)弱点扫描
    Metasploit Framework(4)信息收集
    Metasploit Framework(3)Meterpreter
    Metasploit Framework(2)Exploit模块、Payload使用
    Metasploit Framework(1)基本命令、简单使用
    Kali学习笔记22:缓冲区溢出漏洞利用实验
  • 原文地址:https://www.cnblogs.com/hhh5460/p/5708802.html
Copyright © 2011-2022 走看看