zoukankan      html  css  js  c++  java
  • 005 动态加载实例

    from selenium import webdriver
    from selenium.webdriver.chrome.options import Options
    from time import sleep
    
    # 创建一个对象,用来控制chorme以无界面模式打开
    chrome_options = Options()
    chrome_options.add_argument('--headless')
    chrome_options.add_argument('--disable-gpu')
    
    # 实例化driver对象
    bro = webdriver.Chrome(executable_path='F:\anacondachromedriver.exe', chrome_options=chrome_options)
    
    # 发送请求
    bro.get(url='http://www.baidu.com')
    
    # 截图
    # bro.save_screenshot('first.jpg')
    
    # 查找命令
    my_input = bro.find_element_by_id('kw')
    
    # 向标签中录入指定的数据
    my_input.send_keys('美女')
    
    # 查找“百度一下”
    my_button = bro.find_element_by_id('su')
    my_button.click()
    
    # 获取当前浏览器显示页面的页面源码
    page_text = bro.page_source
    
    print(type(page_text))
    
    bro.quit()
    无头浏览器实现
    from selenium import webdriver
    from selenium.webdriver.chrome.options import Options
    from time import sleep
    
    bro = webdriver.Chrome('F:\anacondachromedriver.exe')
    
    url = 'https://36kr.com/information/contact'
    
    bro.get(url=url)
    
    # 爬取动态加载出来的数据
    js = 'window.scrollTo(0, document.body.scrollHeight)'
    bro.execute_script(js)
    sleep(2)
    bro.execute_script(js)
    sleep(2)
    bro.execute_script(js)
    
    page_text = bro.page_source
    print(page_text)
    
    with open('./36k.html', 'w', encoding='utf-8') as fp:
        fp.write(page_text)
    
    bro.close()
    加载滚动条
    import requests
    from lxml import etree
    import re
    from multiprocessing.dummy import Pool
    import random
    
    url = 'https://www.pearvideo.com/category_8'
    headers = {
        'Use-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
    }
    
    response = requests.get(url=url, headers=headers, verify=False).content.decode()
    xpath_data = etree.HTML(response)
    li_list = xpath_data.xpath('//*[@id="listvideoListUl"]/li')
    
    # 实现并发建立的数据池
    video_url_list = []
    
    for li in li_list:
        # print(li)
        v_href = 'https://www.pearvideo.com/' + li.xpath('.//div[@class="vervideo-bd"]/a/@href')[0]
        # print(v_href)
        d_response = requests.get(url=v_href, headers=headers).content.decode()
        video_url = re.findall('srcUrl="(.*?)",', d_response, re.S)[0]
        video_url_list.append(video_url)
        # print(video_url)
    
    # 常见5个线程
    pool = Pool(5)
    dowmloadVideo = lambda link: requests.get(url=link, headers=headers).content
    # map返回的列表中存储的就是下载完毕的视频二进制数据值
    video_url_list = pool.map(dowmloadVideo, video_url_list)
    
    def save_video(data):
        i = random.randint(1, 1000)
        video_name = 'video/' + str(i) + '.mp4'
        # i = i + 1
        with open(video_name, 'wb') as fp:
            fp.write(data)
    
    pool.map(save_video, video_url_list)
    
    pool.close()
    pool.join()
    多线程的实现
    from selenium import webdriver
    from time import sleep
    
    bro = webdriver.Chrome('F:\anacondachromedriver.exe')
    bro.get('https://qzone.qq.com/')
    sleep(1)
    
    # 定位到iframe
    bro.switch_to.frame('login_frame')
    user = bro.find_element_by_id('switcher_plogin')
    user.click()
    
    sleep(3)
    username = bro.find_element_by_id('u')
    username.send_keys('*****')
    
    sleep(3)
    password = bro.find_element_by_id('p')
    password.send_keys('*****')
    
    sleep(2)
    login = bro.find_element_by_id('login_button')
    login.click()
    
    
    sleep(10)
    bro.quit()
    iframe的实现
  • 相关阅读:
    Linux(centos)下安装JDK
    springmvc的面试知识点总结
    建造者模式
    PHP原型模式
    PHP适配器模式
    php备忘录模式
    PHP代理模式proxy
    单例模式
    工厂模式
    结构模式
  • 原文地址:https://www.cnblogs.com/abc23/p/10751549.html
Copyright © 2011-2022 走看看