zoukankan      html  css  js  c++  java
  • selenium处理动态加载数据

    selenium

    谷歌浏览器驱动下载

    查看驱动和浏览器版本映射关系

    概念:用来完成浏览器自动化相关的操作。可以通过代码的形式制定一些基于浏览器自动化的相关操作(行为动作),当代码执行后,浏览器就会自动触发先关的事件
    环境安装:
      pip install selenium
      下载对应浏览器的驱动程序
    编码流程:
      导包:from selenium import webdriver
      实例化某一款浏览器对象
      制定相关的行为动作

    访问百度

    from selenium import webdriver
    from time import sleep
    bro = webdriver.Chrome(executable_path='./chromedriver.exe') #获取浏览器驱动
    bro.get('https://www.baidu.com') #访问
    sleep(2)
    #标签定位
    tag_input = bro.find_element_by_id('kw') #获取百度输入框
    tag_input.send_keys('人民币') #输入搜索字段
    sleep(2)
    
    btn = bro.find_element_by_id('su') #获取搜索按钮
    btn.click() #点击
    sleep(2)
    
    bro.quit() #退出浏览器

    滑动

    from selenium import webdriver
    from time import sleep
    bro = webdriver.Chrome(executable_path='./chromedriver.exe')
    
    bro.get('https://xueqiu.com/')
    sleep(2)
    
    #执行js实现滚轮向下滑动
    js = 'window.scrollTo(0,document.body.scrollHeight)' #,document.body.scrollHeight:屏幕的高度
    bro.execute_script(js)
    sleep(2)
    bro.execute_script(js)
    sleep(2)
    bro.execute_script(js)
    sleep(2)
    bro.execute_script(js)
    sleep(2)
    
    a_tag = bro.find_element_by_xpath('//*[@id="app"]/div[3]/div/div[1]/div[2]/div[2]/a') #获取加载更多按钮
    a_tag.click()
    sleep(5)
    #获取当前浏览器页面数据(动态)
    print(bro.page_source)
    
    bro.quit()

    PhantomJs

    #PhantomJs是一款无可视化界面的浏览器(免安装)
    from selenium import webdriver
    from time import sleep
    bro = webdriver.PhantomJS(executable_path=r'C:UsersAdministratorDesktop爬虫+数据爬虫day03phantomjs-2.1.1-windowsinphantomjs.exe')
    
    bro.get('https://xueqiu.com/')
    sleep(2)
    bro.save_screenshot('./1.png')
    #执行js实现滚轮向下滑动
    js = 'window.scrollTo(0,document.body.scrollHeight)'
    bro.execute_script(js)
    sleep(2)
    bro.execute_script(js)
    sleep(2)
    bro.execute_script(js)
    sleep(2)
    bro.execute_script(js)
    sleep(2)
    bro.save_screenshot('./2.png')
    # a_tag = bro.find_element_by_xpath('//*[@id="app"]/div[3]/div/div[1]/div[2]/div[2]/a')
    # bro.save_screenshot('./2.png')
    # a_tag.click()
    sleep(2)
    #获取当前浏览器页面数据(动态)
    print(bro.page_source)
    
    bro.quit()

    谷歌无头浏览器

    from selenium import webdriver
    from time import sleep
    from selenium.webdriver.chrome.options import Options
    # 创建一个参数对象,用来控制chrome以无界面模式打开
    chrome_options = Options()
    chrome_options.add_argument('--headless')
    chrome_options.add_argument('--disable-gpu')
    
    bro = webdriver.Chrome(executable_path='./chromedriver.exe',options=chrome_options)
    bro.get('https://www.baidu.com')
    sleep(2)
    bro.save_screenshot('1.png')
    #标签定位
    tag_input = bro.find_element_by_id('kw')
    tag_input.send_keys('人民币')
    sleep(2)
    
    btn = bro.find_element_by_id('su')
    btn.click()
    sleep(2)
    
    print(bro.page_source)
    bro.quit()

    前进和后退

    #前进和后退
    from selenium import webdriver
    from time import sleep
    bro = webdriver.Chrome(executable_path='./chromedriver.exe')
    bro.get('https://www.baidu.com')
    sleep(1)
    bro.get('http://www.goubanjia.com/')
    sleep(1)
    bro.get('https://www.taobao.com')
    sleep(1)
    
    bro.back()
    sleep(1)
    bro.forward()
    sleep(1)
    print(bro.page_source)
    
    bro.quit()

    动作链一

    from selenium import webdriver
    from time import sleep
    from selenium.webdriver import ActionChains  
    bro = webdriver.Chrome(executable_path='./chromedriver.exe')
    url = 'https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable'
    bro.get(url=url)
    #如果定位的标签存在于iframe标签之中,则必须经过switch_to操作在进行标签定位
    bro.switch_to.frame('iframeResult')
    source_tag = bro.find_element_by_id('draggable')
    #创建一个动作连的对象
    action = ActionChains(bro)
    action.click_and_hold(source_tag)
    
    for i in range(4):
        #perform表示开始执行动作链
        action.move_by_offset(20,0).perform()
        sleep(1)
    bro.quit()

    动作链二

    from selenium import webdriver
    from time import sleep
    from selenium.webdriver import ChromeOptions
    from selenium.webdriver import ActionChains  
    #selenium避免被检测 option
    = ChromeOptions() option.add_experimental_option('excludeSwitches', ['enable-automation']) bro = webdriver.Chrome(executable_path='./chromedriver.exe',options=option) url = 'https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable' bro.get(url=url) #如果定位的标签存在于iframe标签之中,则必须经过switch_to操作在进行标签定位 bro.switch_to.frame('iframeResult') source_tag = bro.find_element_by_id('draggable') taget_tag = bro.find_element_by_id('droppable') #创建一个动作连的对象 action = ActionChains(bro) action.drag_and_drop(source_tag,taget_tag) action.perform() sleep(3) # bro.quit()
  • 相关阅读:
    Aizu 0525 Osenbei 搜索 A
    PAT 1088 三人行 模拟,坑 C
    POJ1862 Stripies 贪心 B
    ZOJ 4109 Welcome Party 并查集+优先队列+bfs
    POJ 3685 Matrix
    POJ 3579 Median 二分加判断
    Educational Codeforces Round 63 D. Beautiful Array
    Codeforces Round #553 (Div. 2) C
    HDU 5289
    Codeforces 552 E. Two Teams
  • 原文地址:https://www.cnblogs.com/wanglan/p/10816469.html
Copyright © 2011-2022 走看看