zoukankan      html  css  js  c++  java
  • selenium常用方法

    # from selenium import webdriver
    # from selenium.webdriver import ActionChains
    # from selenium.webdriver.common.by import By #按照什么方式查找,By.ID,By.CSS_SELECTOR
    # from selenium.webdriver.common.keys import Keys #键盘按键操作
    # from selenium.webdriver.support import expected_conditions as EC
    # from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素

     一  selenium 是啥玩意

      selenium是一个自动化测试工具,支持多种浏览器。爬虫中主要用来解决JaveScript渲染的问题。

    二 常用方法

      http://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.remote.webelement

      1 声明浏览器对象

    @property
    def page_source(self):
    """
    Gets the source of the current page.

    :Usage:
    driver.page_source

       注意:page_source返回的并不是浏览器渲染后你看到的实现显示的页面前端代码。而是开发者工具-->Network-->Doc--->Response中的代码。这与在页面上点击 检查,看到的html代码并不一样!!

    drive = webdriver.Chrome()
    drive.get('https://www.baidu.com')
    print(drive.current_url)
    print(drive.get_cookies())
    print(drive.page_source)

      2  查找元素 find_element_by_***

    drive = webdriver.Chrome()
    drive.get('https://www.baidu.com')
    
    input_1 = drive.find_element_by_css_selector('#kw')
    input_2 = drive.find_element_by_id('kw')
    input_3 = drive.find_element_by_xpath('//*[@id="kw"]')
    
    input_4 = drive.find_element(By.ID,'kw')
    print(input_1,input_2,input_3,input_4)
    
    drive.close()

       输出:

      这三种方法实际上是找的同一个标签

    <selenium.webdriver.remote.webelement.WebElement (session="7a9328c7dbf1b3e962ed3e543dab9570", element="0.6029528256407286-1")> <selenium.webdriver.remote.webelement.WebElement (session="7a9328c7dbf1b3e962ed3e543dab9570", element="0.6029528256407286-1")> <selenium.webdriver.remote.webelement.WebElement (session="7a9328c7dbf1b3e962ed3e543dab9570", element="0.6029528256407286-1")> <selenium.webdriver.remote.webelement.WebElement (session="7a9328c7dbf1b3e962ed3e543dab9570", element="0.6029528256407286-1")>

      3  元素交互操作 

        send_keys()

        def send_keys(self, *value):
            """Simulates typing into the element.

        clear()

        def clear(self):
            """Clears the text if it's a text entry element."""

        submit()

        def submit(self):
            """Submits a form."""

      示例

    drive = webdriver.Chrome()
    drive.get('https://www.baidu.com')
    input = drive.find_element_by_css_selector('#kw')
    input.send_keys('iphone')
    time.sleep(2)
    input.clear()
    input.send_keys('华为')
    submit = drive.find_element(By.ID,'su')
    submit.submit()

       4 交互动作

      http://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.action_chains

      ActionChains are a way to automate low level interactions such as mouse movements, mouse button actions, key press, and context menu interactions. This is useful for doing more complex actions like hover over and drag and drop.

    class ActionChains(object):
         def __init__(self, driver):
            """
            Creates a new ActionChains.
         def perform(self):
            """
            Performs all stored actions.
            """

      示例:

    drive = webdriver.Chrome()
    drive.get('https://www.baidu.com')
    actions = ActionChains(drive)
    actions.move_to_element()

      5 执行JavaScript

      execute_script()

        def execute_script(self, script, *args):
            """
            Synchronously Executes JavaScript in the current window/frame.

      示例:

    drive = webdriver.Chrome()
    drive.get('https://www.baidu.com')
    
    drive.execute_script("alert('执行JavaScript')")

      显示:

      

       6 获取元素信息

        1)获取属性

          get_attribute()

        def get_attribute(self, name):
            """Gets the given attribute or property of the element.

        示例:

    drive = webdriver.Chrome()
    drive.get('https://www.jd.com/')
    input = drive.find_element_by_id('key')
    print(input.get_attribute('clstag'))

        输出:

    h|keycount|head|search_a

         2)获取文本值

          text

    @property
        def text(self):
            """The text of the element."""

        示例:

    drive = webdriver.Chrome()
    drive.get('https://www.jd.com/')
    ele = drive.find_element_by_css_selector('#navitems-group1 > li.fore1 > a')
    print(ele.text)

        输出:

    秒杀

        3)获取ID,位置,标签吗,大小

          id

     @property
        def id(self):
            """Internal ID used by selenium.

          location   

     @property
        def location(self):
            """The location of the element in the renderable canvas."""

          tag_name  

     @property
        def tag_name(self):
            """This element's ``tagName`` property."""

          size

    @property
        def size(self):
            """The size of the element."""

        示例:

    drive = webdriver.Chrome()
    drive.get('https://www.jd.com/')
    ele = drive.find_element_by_css_selector('#navitems-group1 > li.fore1 > a')
    print(ele.id)
    print(ele.location)
    print(ele.tag_name)
    print(ele.size)

        输出:

    0.8180466912085045-1
    {'x': 245, 'y': 211}
    a
    {'height': 40, 'width': 28}

       7 等待

      http://selenium-python.readthedocs.io/waits.html

        1) 隐式等待

    drive = webdriver.Chrome()
    drive.implicitly_wait(10)
    drive.get('https://www.jd.com/')
    ele = drive.find_element_by_css_selector('#navitems-group1 > li.fore1 > a')
    print(ele.id)

        2) 显式等待

    drive = webdriver.Chrome()
    drive.get('https://www.jd.com/')
    wait = WebDriverWait(drive,20)
    input = wait.until(EC.presence_of_element_located((By.ID,'key')))
    button = wait.until(EC.element_to_be_clickable((By.CLASS_NAME,'button')))
    input.send_keys('iphone')
    button.click()

      8 前进与后退

        back()

        forward()

      示例:

    drive = webdriver.Chrome()
    drive.implicitly_wait(10)
    drive.get('https://www.jd.com/')
    drive.get('https://www.baidu.com')
    drive.get('https://www.taobao.com/')
    drive.back()
    time.sleep(2)
    drive.back(2)
    time.sleep(2)
    drive.forward()

      9 cookies

        get_cookies()

        add_cookie

        delete_all_cookies()

    drive = webdriver.Chrome()
    drive.implicitly_wait(10)
    drive.get('https://www.baidu.com')
    print(drive.get_cookies())
    drive.add_cookie({'name':'name','value':'123','domin':'www.taobao.com'})
    print(drive.get_cookies())
    drive.delete_all_cookies()
    print(drive.get_cookies())

      输出:

    [{'domain': '.baidu.com', 'httpOnly': False, 'name': 'H_PS_PSSID', 'path': '/', 'secure': False, 'value': '1465_21105_26105'}, {'domain': '.baidu.com', 'expiry': 3671266866.838097, 'httpOnly': False, 'name': 'BAIDUID', 'path': '/', 'secure': False, 'value': '1763F93C00E2F470BFAA4E3FEFD8B537:FG=1'}, {'domain': '.baidu.com', 'expiry': 3671266866.838161, 'httpOnly': False, 'name': 'PSTM', 'path': '/', 'secure': False, 'value': '1523783217'}, {'domain': '.baidu.com', 'expiry': 3671266866.838144, 'httpOnly': False, 'name': 'BIDUPSID', 'path': '/', 'secure': False, 'value': '1763F93C00E2F470BFAA4E3FEFD8B537'}, {'domain': 'www.baidu.com', 'httpOnly': False, 'name': 'BD_HOME', 'path': '/', 'secure': False, 'value': '0'}, {'domain': '.baidu.com', 'expiry': 1523869620.35137, 'httpOnly': False, 'name': 'BDORZ', 'path': '/', 'secure': False, 'value': 'B490B5EBF6F3CD402E515D22BCDA1598'}, {'domain': 'www.baidu.com', 'expiry': 1524647220, 'httpOnly': False, 'name': 'BD_UPN', 'path': '/', 'secure': False, 'value': '12314753'}]
    [{'domain': '.baidu.com', 'httpOnly': False, 'name': 'H_PS_PSSID', 'path': '/', 'secure': False, 'value': '1465_21105_26105'}, {'domain': '.baidu.com', 'expiry': 3671266866.838097, 'httpOnly': False, 'name': 'BAIDUID', 'path': '/', 'secure': False, 'value': '1763F93C00E2F470BFAA4E3FEFD8B537:FG=1'}, {'domain': '.baidu.com', 'expiry': 3671266866.838161, 'httpOnly': False, 'name': 'PSTM', 'path': '/', 'secure': False, 'value': '1523783217'}, {'domain': '.baidu.com', 'expiry': 3671266866.838144, 'httpOnly': False, 'name': 'BIDUPSID', 'path': '/', 'secure': False, 'value': '1763F93C00E2F470BFAA4E3FEFD8B537'}, {'domain': 'www.baidu.com', 'httpOnly': False, 'name': 'BD_HOME', 'path': '/', 'secure': False, 'value': '0'}, {'domain': '.baidu.com', 'expiry': 1523869620.35137, 'httpOnly': False, 'name': 'BDORZ', 'path': '/', 'secure': False, 'value': 'B490B5EBF6F3CD402E515D22BCDA1598'}, {'domain': 'www.baidu.com', 'expiry': 1524647220, 'httpOnly': False, 'name': 'BD_UPN', 'path': '/', 'secure': False, 'value': '12314753'}, {'domain': 'www.baidu.com', 'expiry': 2154503220, 'httpOnly': False, 'name': 'name', 'path': '/', 'secure': True, 'value': '123'}]
    []

      10 选项卡管理

        window.open()

        window_handles

        示例:

    drive = webdriver.Chrome()
    drive.implicitly_wait(10)
    drive.get('https://www.baidu.com')
    drive.execute_script('window.open()')   #利用js,打开新的选项卡
    print(drive.window_handles)
    drive.switch_to_window(drive.window_handles[1])
    time.sleep(5)
    drive.get('https://www.baidu.com')
    drive.switch_to_window(drive.window_handles[0])
    time.sleep(4)
    drive.get('https://python.org')

      输出:

    ['CDwindow-240B9191715FA7F46E0753DFC1879086', 'CDwindow-499D6E474185626BE3638D9E75ADE6C']

      11 异常处理

      http://selenium-python.readthedocs.io/api.html#module-selenium.common.exceptions

  • 相关阅读:
    Linux systemctl 命令完全指南
    分享一些 Kafka 消费数据的小经验
    大数据日志采集系统
    使用Spring Boot Actuator将指标导出到InfluxDB和Prometheus
    这可能是最为详细的Docker入门吐血总结
    用不用lambda,这是一个问题
    es上的的Watcher示例
    Elasticsearch6.5.2 X-pack破解及安装教程
    oauth2.0通过JdbcClientDetailsService从数据库读取相应的配置
    Apache Beam实战指南 | 手把手教你玩转大数据存储HdfsIO
  • 原文地址:https://www.cnblogs.com/654321cc/p/8269158.html
Copyright © 2011-2022 走看看