zoukankan      html  css  js  c++  java
  • Python爬虫_selenium

    环境安装

    应用

    from selenium import webdriver
    from time import sleep
    #实例化浏览器插件
    bro = webdriver.Chrome(executable_path='./chromedriver.exe')
    bro.get('https://www.baidu.com')
    sleep(2)
    #标签定位
    tag_input = bro.find_element_by_id('kw')
    tag_input.send_keys('人民币')
    sleep(2)
    
    btn = bro.find_element_by_id('su')
    btn.click()
    sleep(2)
    #关闭浏览器
    bro.quit()

    雪球网应用

    from selenium import webdriver
    from time import sleep
    bro = webdriver.Chrome(executable_path='./chromedriver.exe')
    
    bro.get('https://xueqiu.com/')
    sleep(5)
    
    #执行js实现滚轮向下滑动
    js = 'window.scrollTo(0,document.body.scrollHeight)'
    bro.execute_script(js)
    sleep(2)
    bro.execute_script(js)
    sleep(2)
    bro.execute_script(js)
    sleep(2)
    bro.execute_script(js)
    sleep(2)
    #定位到加载更多按钮
    a_tag = bro.find_element_by_xpath('//*[@id="app"]/div[3]/div/div[1]/div[2]/div[2]/a')
    a_tag.click()
    sleep(5)
    #获取当前浏览器页面数据(动态)
    print(bro.page_source)
    
    bro.quit()

    PhantomJs是一款无可视化界面的浏览器(免安装) 已停止更新  不建议使用 

    from selenium import webdriver
    from time import sleep
    bro = webdriver.PhantomJS(executable_path=r'phantomjs-2.1.1-windowsinphantomjs.exe')
    
    bro.get('https://xueqiu.com/')
    sleep(2)
    #截屏 bro.save_screenshot(
    './1.png') #执行js实现滚轮向下滑动 js = 'window.scrollTo(0,document.body.scrollHeight)' bro.execute_script(js) sleep(2) bro.execute_script(js) sleep(2) bro.execute_script(js) sleep(2) bro.execute_script(js) sleep(2) bro.save_screenshot('./2.png') # a_tag = bro.find_element_by_xpath('//*[@id="app"]/div[3]/div/div[1]/div[2]/div[2]/a') # bro.save_screenshot('./2.png') # a_tag.click() sleep(2) #获取当前浏览器页面数据(动态) print(bro.page_source) bro.quit()

    谷歌无头浏览器

    from selenium import webdriver
    from time import sleep
    from selenium.webdriver.chrome.options import Options
    # 创建一个参数对象,用来控制chrome以无界面模式打开
    chrome_options = Options()
    chrome_options.add_argument('--headless')
    chrome_options.add_argument('--disable-gpu')
    
    bro = webdriver.Chrome(executable_path='./chromedriver.exe',options=chrome_options)
    bro.get('https://www.baidu.com')
    sleep(2)
    bro.save_screenshot('1.png')
    #标签定位
    tag_input = bro.find_element_by_id('kw')
    tag_input.send_keys('人民币')
    sleep(2)
    
    btn = bro.find_element_by_id('su')
    btn.click()
    sleep(2)
    
    print(bro.page_source)
    bro.quit()

    动作链

    from selenium import webdriver
    from time import sleep
    from selenium.webdriver import ActionChains  
    bro = webdriver.Chrome(executable_path='./chromedriver.exe')
    url = 'https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable'
    bro.get(url=url)
    #如果定位的标签存在于iframe标签之中,则必须经过switch_to操作在进行标签定位
    bro.switch_to.frame('iframeResult')
    source_tag = bro.find_element_by_id('draggable')
    #创建一个动作连的对象
    action = ActionChains(bro)
    action.click_and_hold(source_tag)
    
    for i in range(4):
        #perform表示开始执行动作链
        action.move_by_offset(20,0).perform()
        sleep(1)
    bro.quit()
        

     selenium规避被检测识别

    现在不少大网站有对selenium采取监测机制。比如正常情况下我们用浏览器访问淘宝等网站的 window.navigator.webdriver的值为undefined。而使用selenium访问则该值为true。

    只需要设置Chromedriver的启动参数即可解决问题。在启动Chromedriver之前,为Chrome开启实验性功能参数 excludeSwitches,它的值为['enable-automation']

    from selenium.webdriver import Chrome
    from selenium.webdriver import ChromeOptions
    
    option = ChromeOptions()
    option.add_experimental_option('excludeSwitches',['enable-automation'])
    driver=Chrome(options=option)
  • 相关阅读:
    动态控件、控件的生存周期和ViewState的运行细节
    PDA开发初级经验
    编译原理知识总结
    A System for Collecting and Analyzing TopicSpecific Web Information
    show tooltip on control
    慧科新闻、慧科搜索
    内存泄漏
    www.sinobankers.com/forum“今日新帖”“最新会员”“论坛热贴”消失问题
    一堆信息抽取的资料文档
    showing tooltip on controls (description on TTN_NEEDTEXT)
  • 原文地址:https://www.cnblogs.com/z1115230598/p/10987165.html
Copyright © 2011-2022 走看看