zoukankan      html  css  js  c++  java
  • Selenium的使用

    • 基本使用

      from selenium import webdriver
      from selenium.webdriver.common.by import By
      from selenium.webdriver.common.keys import Keys
      from selenium.webdriver.support import expected_conditions
      from selenium.webdriver.support.wait import WebDriverWait
      
      # webdriver 根据一个或多个贡献者许可协议授权给软件自由保护协会(SFC)。
      browser = webdriver.Chrome()
      try:
          browser.get('https://www.baidu.com')                        # get() 在当前浏览器会话中加载网页。
          input = browser.find_element_by_id('kw')                    # find_element_by_id() 按ID查找元素
          input.send_keys('Python')                                   # send_keys() 模拟键入元素。
          input.send_keys(Keys.ENTER)                                 # Keys 是一组特殊钥匙代码。ENTER = 'ue007'
          wait = WebDriverWait(browser, 10)                           # WebDriverWait()构造函数,获取WebDriver实例、超时(以秒为单位)
          wait.until(                                                 # until()调用随驱动程序提供的方法作为参数,直到返回值不为假。
              expected_conditions.presence_of_element_located(
                  # presence_of_element_located()检查页的DOM中是否存在元素的期望值。
                  # 这不一定意味着元素是可见的。locator-用于查找元素,一旦找到该元素,它将返回WebElement。
                  (By.ID, 'content_left')                             # By 是支持的定位策略集。ID = "id"
              )
          )
          print(
              browser.current_url,                # current_url属性 获取当前页的URL。
              browser.get_cookies(),              # get_cookies()方法 返回一组字典,对应于当前会话中可见的cookie。
              browser.page_source,                # page_source属性 获取当前页的源码
              sep='
      '
          )
      
      finally:
          browser.close()                         # 关闭当前窗口
      View Code
    • 声明浏览器对象

      from selenium import webdriver
      
      # Selenium支持的一部分浏览器
      browser1 = webdriver.Chrome()
      browser2 = webdriver.Firefox()
      browser3 = webdriver.Edge()
      browser4 = webdriver.PhantomJS()
      browser5 = webdriver.Safari()
      
      # 完成浏览器对象的初始化并将其赋值为 browser 对象
      View Code
    • 访问页面

      from selenium import webdriver
      
      browser = webdriver.Chrome()
      browser.get('https://www.taobao.com')               # get()方法请求网页
      print(browser.page_source)                          # page_source属性获取网页源代码
      browser.close()
      View Code
    • 查找节点

      # 拿淘宝网举例
      from selenium import webdriver
      
      browser = webdriver.Chrome()
      browser.get('https://www.taobao.com')
      # 分别利用ID、CSS、XPath三种方式查找节点,以淘宝网源代码中的一个id='q'的节点为例。
      input_first = browser.find_element_by_id('q')
      input_second = browser.find_element_by_css_selector('#q')
      input_third = browser.find_element_by_xpath('//*[@id="q"]')
      print(
          'ID方式查找节点:	', input_first,
          'CSS选择器方式查找节点:	', input_second,
          'XPath方式查找节点:	', input_third,
          sep='
      '
      )
      browser.close()
      
      
      # 输出:
      ID方式查找节点:    
      <selenium.webdriver.remote.webelement.WebElement (session="1ec980e4cd9be81c212a1b2285039dd9", element="0.49282688108570993-1")>
      CSS选择器方式查找节点:    
      <selenium.webdriver.remote.webelement.WebElement (session="1ec980e4cd9be81c212a1b2285039dd9", element="0.49282688108570993-1")>
      XPath方式查找节点:    
      <selenium.webdriver.remote.webelement.WebElement (session="1ec980e4cd9be81c212a1b2285039dd9", element="0.49282688108570993-1")>
      # 输出均为WebElement 类型
      
      
      # 获取单个节点的方法:
      find_element_by_id
      find_element_by_name
      find_element_by_xpath
      find_element_by_link_text
      find_element_by_partial_link_text
      find_element_by_tag_name
      find_element_by_class_name
      find_element_by_css_selector
      获取单个节点
      from selenium import webdriver
      from selenium.webdriver.common.by import By
       
      browser = webdriver.Chrome()
      browser.get('https://www.taobao.com')
      input_first = browser.find_element(By.ID, 'q')
      print(input_first)
      browser.close()
      
      
      # find_element_by_id(id)就等价于find_element(By.ID, id),二者得到的结果完全一致。
      # By是一个支持的定位策略集
      find_element()方法查找单个节点
      from selenium import webdriver
      from selenium.webdriver.common.by import By
      
      browser = webdriver.Chrome()
      browser.get('https://www.taobao.com')
      
      list1 = browser.find_elements_by_css_selector('.service-bd li')
      list2 = browser.find_elements(By.CSS_SELECTOR, '.service-bd li')
      
      print(
          'find_elements_by_css_selector()方法:', list1,
          'find_elements()方法:', list2,
          sep='
      '
      )
      browser.close()
      
      
      # 输出:
      find_elements_by_css_selector()方法:
      [<selenium.webdriver.remote.webelement.WebElement (session="4bcb567fe9900ef1ec4336651fc12a1d", element="0.257017382611505-1")>, 
      ......
      <selenium.webdriver.remote.webelement.WebElement (session="4bcb567fe9900ef1ec4336651fc12a1d", element="0.257017382611505-16")>]
      find_elements()方法:
      [<selenium.webdriver.remote.webelement.WebElement (session="4bcb567fe9900ef1ec4336651fc12a1d", element="0.257017382611505-1")>, 
      ......
       <selenium.webdriver.remote.webelement.WebElement (session="4bcb567fe9900ef1ec4336651fc12a1d", element="0.257017382611505-16")>]
      # 两种方法输出结果一样
      获取多个节点
    • 节点交互

      from selenium import webdriver
      import time
      
      browser = webdriver.Chrome()
      browser.get('https://www.taobao.com')
      
      input_ = browser.find_element_by_id('q')
      input_.send_keys('iPhone')                              # 模拟键盘输入iPhone
      time.sleep(1)                                           # time.sleep() 将执行延迟给定秒数。
      input_.clear()                                          # 清除输入
      input_.send_keys('iPad')                                # 模拟键盘数如iPad
      button = browser.find_element_by_class_name('tb-bg')            # 找到一个属性值为tb-bg的节点
      button.click()                                                  # 模拟鼠标点击
      View Code
    • Selenium驱动浏览器来执行一些操作

    • 动作链

      from selenium import webdriver
      from selenium.webdriver import ActionChains
      
      browser = webdriver.Chrome()
      browser.get('http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable')
      browser.switch_to.frame('iframeResult')                             # 切换到框架'iframeResult'
      
      source = browser.find_element_by_css_selector('#draggable')         # 要摇曳的节点
      target = browser.find_element_by_css_selector('#droppable')         # 摇曳到的目标节点
      
      actions = ActionChains(browser)                     # ActionChains() 创建动作链
      actions.drag_and_drop(source, target)               # drag_and_drop()按住源元素上的鼠标左键,然后移动到目标元素并释放鼠标按钮。
      actions.perform()                                   # perform() 执行所有存储的操作。
      View Code

      拖拽前
      拖拽后

    • 执行JavaScript

      # 利用 execute_script()方法将进度条下拉到最底部,然后弹出 alert提示框。
      from selenium import webdriver
      
      browser = webdriver.Chrome()
      browser.get('http://www.zhihu.com/explore')
      browser.execute_script('window.scrollTo(0, document.body.scrollHeight)')
      View Code
    • 获取节点信息

      • 获取属性----get_attribute()方法

        from selenium import webdriver
        
        browser = webdriver.Chrome()
        browser.get('http://www.zhihu.com/explore')
        
        logo = browser.find_element_by_id('zh-top-inner')
        print(
            logo,
            logo.get_attribute('class'),
            sep='
        '
        )
        browser.close()
        
        
        # 输出:
        <selenium.webdriver.remote.webelement.WebElement (session="7f325513a2f34aaa95612698d78817e6", element="0.5056570582847388-1")>
        zg-wrap modal-shifting clearfix
        View Code
      • 获取文本值----text属性

        # 相当于 Beautiful Soup 的 get_text()方法、 pyquery 的 text()方法
        from selenium import webdriver
        
        browser = webdriver.Chrome()
        browser.get('http://www.zhihu.com/explore')
        
        zhihu = browser.find_element_by_class_name('zu-top-link-logo')
        print(zhihu.text)
        browser.close()
        
        
        # 输出:
        知乎
        View Code
      • 获取id、位置、标签名和大小----idlocationtag_namesize属性

        from selenium import webdriver
        
        browser = webdriver.Chrome()
        browser.get('http://www.zhihu.com/explore')
        
        input_ = browser.find_element_by_class_name('zu-top-add-question')
        print(
            input_.id,
            input_.location,
            input_.tag_name,
            input_.size,
            sep='
        '
        )
        browser.close()
        
        
        # 输出:
        0.9009302916784063-1
        {'x': 849, 'y': 7}
        button
        {'height': 32, 'width': 66}
        View Code
    • 切换Frame

      from selenium import webdriver
      from selenium.common.exceptions import NoSuchElementException
      browser = webdriver.Chrome()
      url = 'http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable'
      browser.get(url)
      
      browser.switch_to.frame('iframeResult')                 # switch_to.frame() 切换Frame
      try:
          logo = browser.find_element_by_class_name('logo')
      except NoSuchElementException:                          # NoSuchElementException 找不到元素时引发
          print('NO LOGO')
      browser.switch_to.parent_frame()
      logo_ = browser.find_element_by_class_name('logo')
      print(logo_)
      print(logo_.text)
      
      
      # 输出:
      NO LOGO
      <selenium.webdriver.remote.webelement.WebElement (session="d24c9d62b8c5882adec32f3ed55b5d7b", element="0.9706135395535092-2")>
      RUNOOB.COM
      View Code
    • 延时等待

      • 隐式等待

        from selenium import webdriver
        
        browser = webdriver.Chrome()
        browser.implicitly_wait(10)  # implicitly_wait()隐式等待,默认等待0秒,找不到继续等一会在找,容易受到页面加载时间的影响
        browser.get('https://www.zhihu.com/explore')
        input = browser.find_element_by_class_name('zu-top-add-question')
        print(input)
        View Code
      • 显式等待

        from selenium import webdriver
        from selenium.webdriver.common.by import By
        from selenium.webdriver.support.ui import WebDriverWait
        from selenium.webdriver.support import expected_conditions as EC
        
        browser = webdriver.Chrome()
        browser.get('https://www.taobao.com/')
        wait = WebDriverWait(browser, 10)  # WebDriverWait() 显式等待,不会受页面的加载时间、网络条件的影响
        input_ = wait.until(EC.presence_of_element_located((By.ID, 'q')))    # until()调用与驱动程序一起提供的方法作为参数,直到返回值不为假。
        button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '.btn-search')))
        # presence_of_element_located这个条件,代表节点出现的意思
        # element_to_be_clickable这个条件代表可点击的
        print(input_, button, sep='
        ')
        
        
        # 输出:
        <selenium.webdriver.remote.webelement.WebElement (session="497bc36663dd6ed41d7c59bd6a51982f", element="0.8346683456577526-1")>
        <selenium.webdriver.remote.webelement.WebElement (session="497bc36663dd6ed41d7c59bd6a51982f", element="0.8346683456577526-2")>
        View Code
        class title_is(object):
            """An expectation for checking the title of a page.
            title is the expected title, which must be an exact match
            returns True if the title matches, false otherwise."""
        
        
        class title_contains(object):
            """ An expectation for checking that the title contains a case-sensitive
            substring. title is the fragment of title expected
            returns True when the title matches, False otherwise
            """
        
        
        class presence_of_element_located(object):
            """ An expectation for checking that an element is present on the DOM
            of a page. This does not necessarily mean that the element is visible.
            locator - used to find the element
            returns the WebElement once it is located
            """
        
        
        class url_contains(object):
            """ An expectation for checking that the current url contains a
            case-sensitive substring.
            url is the fragment of url expected,
            returns True when the url matches, False otherwise
            """
        
        
        class url_matches(object):
            """An expectation for checking the current url.
            pattern is the expected pattern, which must be an exact match
            returns True if the url matches, false otherwise."""
        
        
        class url_to_be(object):
            """An expectation for checking the current url.
            url is the expected url, which must be an exact match
            returns True if the url matches, false otherwise."""
        
        
        class url_changes(object):
            """An expectation for checking the current url.
            url is the expected url, which must not be an exact match
            returns True if the url is different, false otherwise."""
        
        
        class visibility_of_element_located(object):
            """ An expectation for checking that an element is present on the DOM of a
            page and visible. Visibility means that the element is not only displayed
            but also has a height and width that is greater than 0.
            locator - used to find the element
            returns the WebElement once it is located and visible
            """
        
        
        class visibility_of(object):
            """ An expectation for checking that an element, known to be present on the
            DOM of a page, is visible. Visibility means that the element is not only
            displayed but also has a height and width that is greater than 0.
            element is the WebElement
            returns the (same) WebElement once it is visible
            """
        
        
        class presence_of_all_elements_located(object):
            """ An expectation for checking that there is at least one element present
            on a web page.
            locator is used to find the element
            returns the list of WebElements once they are located
            """
        
        
        class visibility_of_any_elements_located(object):
            """ An expectation for checking that there is at least one element visible
            on a web page.
            locator is used to find the element
            returns the list of WebElements once they are located
            """
        
        
        class visibility_of_all_elements_located(object):
            """ An expectation for checking that all elements are present on the DOM of a
            page and visible. Visibility means that the elements are not only displayed
            but also has a height and width that is greater than 0.
            locator - used to find the elements
            returns the list of WebElements once they are located and visible
            """
        
        
        class text_to_be_present_in_element(object):
            """ An expectation for checking if the given text is present in the
            specified element.
            locator, text
            """
        
        
        class text_to_be_present_in_element_value(object):
            """
            An expectation for checking if the given text is present in the element's
            locator, text
            """
        
        
        class frame_to_be_available_and_switch_to_it(object):
            """ An expectation for checking whether the given frame is available to
            switch to.  If the frame is available it switches the given driver to the
            specified frame.
            """
        
        
        class invisibility_of_element_located(object):
            """ An Expectation for checking that an element is either invisible or not
            present on the DOM.
        
            locator used to find the element
            """
        
        
        class invisibility_of_element(invisibility_of_element_located):
            """ An Expectation for checking that an element is either invisible or not
            present on the DOM.
        
            element is either a locator (text) or an WebElement
            """
        
        
        class element_to_be_clickable(object):
            """ An Expectation for checking an element is visible and enabled such that
            you can click it."""
        
        
        class staleness_of(object):
            """ Wait until an element is no longer attached to the DOM.
            element is the element to wait for.
            returns False if the element is still attached to the DOM, true otherwise.
            """
        
        
        class element_to_be_selected(object):
            """ An expectation for checking the selection is selected.
            element is WebElement object
            """
        
        
        class element_located_to_be_selected(object):
            """An expectation for the element to be located is selected.
            locator is a tuple of (by, path)"""
        
        
        class element_selection_state_to_be(object):
            """ An expectation for checking if the given element is selected.
            element is WebElement object
            is_selected is a Boolean."
            """
        
        
        class element_located_selection_state_to_be(object):
            """ An expectation to locate an element and check if the selection state
            specified is in that state.
            locator is a tuple of (by, path)
            is_selected is a boolean
            """
        
        
        class number_of_windows_to_be(object):
            """ An expectation for the number of windows to be a certain value."""
        
        
        class new_window_is_opened(object):
            """ An expectation that a new window will be opened and have the number of
            windows handles increase"""
        
        
        class alert_is_present(object):
            """ Expect an alert to be present."""
        
        
        def _find_element(driver, by):
            """Looks up an element. Logs and re-raises ``WebDriverException``
            if thrown."""
        等待条件
    • 前进和后退

      # back()方法,后退到上一个以页面。forward()方法,前进到下一个页面
      import time
      from selenium import webdriver
      
      browser = webdriver.Chrome()
      browser.get('https://www.baidu.com/')
      time.sleep(1)
      browser.get('https://www.taobao.com/')
      time.sleep(1)
      browser.get('https://www.zhihu.com/')
      time.sleep(1)
      browser.back()          # 现在位于https://www.zhihu.com/页面,返回上一个页面即为https://www.taobao.com/页面
      time.sleep(1)
      browser.forward()       # 现在位于https://www.taobao.com/页面,跳到下一个页面即为https://www.zhihu.com/页面
      time.sleep(1)
      browser.close()
      View Code
    • Cookies

      from selenium import webdriver
      
      browser = webdriver.Chrome()
      browser.get('https://www.zhihu.com/explore')
      print(browser.get_cookies())        # 获取Cookies
      browser.add_cookie({'name': 'name', 'domain': 'www.zhihu.com', 'value': 'germey'})          # 添加Cookie
      print(browser.get_cookies())        # 获取Cookies
      browser.delete_all_cookies()        # 删除Cookies
      print(browser.get_cookies())        # 获取Cookies
      browser.close()
      
      
      # 输出:
      [{'domain': '.zhihu.com', 'expiry': 1579115127, 'httpOnly': False, 'name': '__utmz', 'path': '/', 'secure': False, 'value': '51854390.1563347127.1.1.utm......]
      [{'domain': 'www.zhihu.com', 'expiry': 2194067127, 'httpOnly': False, 'name': 'name', 'path': '/', 'secure': True, 'value': 'germey'}, {'domain': '.zhihu.com......]
      []
      View Code
    • 选项卡管理

      import time
      from selenium import webdriver
      
      browser = webdriver.Chrome()
      browser.get('https://www.baidu.com')
      browser.execute_script('window.open()')
      # 调用了execute_script()方法,执行这里传入JavaScript。 window.open() 这个JavaScript语句新开启一个选项卡 。
      
      print(browser.window_handles)
      # 调用 window_handles 属性获取当前开启的所有选项卡,返回的是选项卡的代号列表
      
      browser.switch_to.window(browser.window_handles[1])
      # 调用 switch_to_window() 方法,切换选项卡
      
      browser.get('https://www.taobao.com')
      time.sleep(1)
      browser.switch_to.window(browser.window_handles[0])
      browser.get('https://zhihu.com')
      browser.close()                     # 关闭当前所在的选项卡
      
      
      # 输出:
      ['CDwindow-BBF992DA636EC22831C022F29A7F976A', 'CDwindow-37A0508493A023D6BC1393D11D5F4D9F']
      View Code
    • 异常处理

      from selenium import webdriver
      from selenium.common.exceptions import TimeoutException, NoSuchElementException
      
      browser = webdriver.Chrome()
      try:
          browser.get('https://www.baidu.com')
      except TimeoutException:
          print('Time Out')
      try:
          browser.find_element_by_id('hello')
      except NoSuchElementException:
          print('No Element')
      finally:
          browser.close()
      
      # 输出:
      No Element
      View Code
  • 相关阅读:
    spring注解集合
    spring工作原理理解
    Linux下mysql命令 导入 导出sql文件
    List和Set排序的实现
    LeetCode--树
    LeetCode--链表
    LeetCode--字符串
    LeetCode--贪心算法
    LeetCode--数组
    数据库编程基本练习题
  • 原文地址:https://www.cnblogs.com/liyihua/p/11189702.html
Copyright © 2011-2022 走看看