zoukankan      html  css  js  c++  java
  • 爬虫基础 selenium 基础

    # -*- coding:utf8 -*-
    # 工程路径:selenium驱动浏览器详解.py
    # 工程日期:10/6/2019
    # 工程目标:selenium 自动化测试库
    
    #%% 用来驱动浏览器模拟人的操作
    # 主要用于解析JS渲染的页面
    
    from selenium import webdriver
    from selenium.webdriver.common.by import By
    from selenium.webdriver.common.keys import Keys
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.support.wait import  WebDriverWait
    
    browser = webdriver.Chrome()  # 声明一个浏览器对象
    try:
        browser.get('https://www.baidu.com')  # 发起请求
        input = browser.find_element_by_id('kw')
        input.send_keys('美女图片')   # 输入关键字
        input.send_keys(Keys.ENTER)   # 回车
        wait = WebDriverWait(browser,10)  # 等待
        wait.until(EC.presence_of_element_located((By.ID, 'content_left')))   # 等待条件
        print(browser.current_url)    #输出 URL
        print(browser.get_cookies())  # 查看cookie
    finally:
        print("ok")
        browser.close()
    
    #%%  选择元素
    # 普通选择
    # css选择
    # xpath选择
    
    # 选取单个元素
    from selenium import webdriver
    browser = webdriver.Chrome()
    browser.get('http://www.taobao.com')
    find_nom = browser.find_elements_by_id('q')  # 正常方式查找
    find_css = browser.find_element_by_css_selector('#q')  # css选择器查找
    find_xpath = browser.find_element_by_xpath('//*[@id="q"]')  # xpath解析查找
    print(find_css, find_nom, find_xpath)
    """
    * find_element_by_name
    * find_element_by_xpath
    * find_element_by_link_text
    * find_element_by_partial_link_text
    * find_element_by_tag_name
    * find_element_by_class_name
    * find_element_by_css_selector
    """
    
    
    # 通用查找  browser.find_element(By.ID, 'w')
    find_us = browser.find_element(By.ID, 'q')
    print(find_us)
    
    
    # 多个元素查找
    print("----查找多个元素----")
    find_more = browser.find_elements_by_id('q')
    find_more_css = browser.find_elements_by_css_selector('.service-bd li')
    # 注意普通参数和CSS参数, 以及选择的那个元素和多个元素的复数的区别
    find_more_nom = browser.find_elements(By.CSS_SELECTOR, '.service-bd li')
    print(find_more)
    print(find_more_css)
    print("# 注意普通参数和CSS参数, 以及选择的那个元素和多个元素的复数的区别")
    print(find_more_nom)
    
    """ 多个元素的的返回是列表的形式
    * find_elements_by_name
    * find_elements_by_xpath
    * find_elements_by_link_text
    * find_elements_by_partial_link_text
    * find_elements_by_tag_name
    * find_elements_by_class_name
    * find_elements_by_css_selector
    """
    
    #%% 元素的交互操作
    # 对浏览器中的
    # 获取浏览器的文本框,按钮,滑动条,交互 输入文字,对浏览器的元素进行操作模拟人交互
    from selenium import webdriver
    import time
    
    browser = webdriver.Chrome()   # 申明对象
    browser.get('https://www.taobao.com')  # 发起请求
    input = browser.find_element_by_id('q')   # 查找目标元素
    input.send_keys('iPhone')    # 输入关键字
    time.sleep(1) # 模拟等待
    input.clear() # 删除输入的关键字
    input.send_keys('iPad') # 再次输入关键字
    button = browser.find_element_by_class_name('btn-search')  # 查找search点击按钮
    button.click()    # 模拟按钮的点击
    #%% 更多操作: http://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.remote.webelement
    
    
    
    #%% 交互的动作 将元素的动作加到动作链中串行执行, action chains 动作链
    from selenium import webdriver
    from selenium.webdriver import ActionChains
    
    browser = webdriver.Chrome()
    url = 'http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable'
    browser.get(url)
    browser.switch_to.frame('iframeResult')   # 切换到 iframe标签
    source = browser.find_element_by_css_selector('#draggable')     # 找到被拖动元素
    target = browser.find_element_by_css_selector('#droppable')     # 找到拖动的位置的目标元素
    actions = ActionChains(browser)  # 申明动作链对象
    actions.drag_and_drop(source, target)  # 执行拖拽
    actions.perform()  #   显示
    
    #其他交互操作: http://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.action_chains
    """
    click_and_hold(on_element=None)
        Holds down the left mouse button on an element.
        Args:
        on_element: The element to mouse down. If None, clicks on current mouse position.
        context_click(on_element=None)
    
    Performs a context-click (right click) on an element.
        Args:
        on_element: The element to context-click. If None, clicks on current mouse position.
    
    double_click(on_element=None)
        Double-clicks an element.
        Args:
        on_element: The element to double-click. If None, clicks on current mouse position.
    
    drag_and_drop(source, target)
        Holds down the left mouse button on the source element,
        then moves to the target element and releases the mouse button.
        Args:
        source: The element to mouse down.
        target: The element to mouse up.
    
    drag_and_drop_by_offset(source, xoffset, yoffset)
        Holds down the left mouse button on the source element,
        then moves to the target offset and releases the mouse button.
        Args:
        source: The element to mouse down.
        xoffset: X offset to move to.
        yoffset: Y offset to move to.
    
    key_down(value, element=None)
        Sends a key press only, without releasing it.
        Should only be used with modifier keys (Control, Alt and Shift).
        Args:
        value: The modifier key to send. Values are defined in Keys class.
        element: The element to send keys. If None, sends a key to current focused element.
        Example, pressing ctrl+c:
    
    ActionChains(driver).key_down(Keys.CONTROL).send_keys('c').key_up(Keys.CONTROL).perform()
        key_up(value, element=None)
        Releases a modifier key.
        Args:
        value: The modifier key to send. Values are defined in Keys class.
        element: The element to send keys. If None, sends a key to current focused element.
        Example, pressing ctrl+c:
    
    ActionChains(driver).key_down(Keys.CONTROL).send_keys('c').key_up(Keys.CONTROL).perform()
        move_by_offset(xoffset, yoffset)
        Moving the mouse to an offset from current mouse position.
        Args:
        xoffset: X offset to move to, as a positive or negative integer.
        yoffset: Y offset to move to, as a positive or negative integer.
    
    move_to_element(to_element)
        Moving the mouse to the middle of an element.
        Args:
        to_element: The WebElement to move to.
    
    move_to_element_with_offset(to_element, xoffset, yoffset)
        Move the mouse by an offset of the specified element.
        Offsets are relative to the top-left corner of the element.
        Args:
        to_element: The WebElement to move to.
        xoffset: X offset to move to.
        yoffset: Y offset to move to.
    
    pause(seconds)
        Pause all inputs for the specified duration in seconds
    
    perform()
        Performs all stored actions.
    
    release(on_element=None)
        Releasing a held mouse button on an element.
        Args:
        on_element: The element to mouse up. If None, releases on current mouse position.
    
    reset_actions()
        Clears actions that are already stored locally and on the remote end
    
    send_keys(*keys_to_send)
        Sends keys to current focused element.
        Args:
        keys_to_send: The keys to send. Modifier keys constants can be found in the ‘Keys’ class.
    
    send_keys_to_element(element, *keys_to_send)
        Sends keys to an element.
        Args:
        element: The element to send keys.
        keys_to_send: The keys to send. Modifier keys constants can be found in the ‘Keys’ class
    """
    
    #%% 执行javascript
    # 通过execute_script 来执行javascript交互
    # 万能方法
    from  selenium import  webdriver
    browser = webdriver.Chrome()
    browser.get('https://www.zhihu.com/explore')
    browser.execute_script('window.scrollTo(0, document.body.scrollHeight)')
    browser.execute_script('alert("To Bottom")')
    
    
    #%%  获取页面的节点的属性信息, 文本信息
    
    from selenium import webdriver
    browser = webdriver.Chrome()
    browser.get('https://www.zhihu.com/explore')
    input = browser.find_element_by_class_name('zu-top-add-question')
    input1 = browser.find_element_by_class_name('zu-top-nav-link')
    print(input.text)
    print(input1.text)
    
    #%% 翻页操作
    import time
    from selenium import webdriver
    
    browser = webdriver.Chrome()
    browser.get('https://www.baidu.com/')
    browser.get('https://www.taobao.com/')
    browser.get('https://www.python.org/')
    browser.back()
    time.sleep(1)
    browser.forward()
    browser.close()
    
    #%% cookies 使用和管理
    from selenium import webdriver
    
    browser = webdriver.Chrome()
    browser.get('https://www.zhihu.com/explore')
    print(browser.get_cookies())
    browser.add_cookie({'name': 'name', 'domain': 'www.zhihu.com', 'value': 'germey'})
    print(browser.get_cookies())
    browser.delete_all_cookies()
    print(browser.get_cookies())
    
    #%% 异常的处理
    # 查看官方文档详细的异常的情况
    
    from selenium import webdriver
    from selenium.common.exceptions import TimeoutException, NoSuchElementException
    
    browser = webdriver.Chrome()
    try:
        browser.get('https://www.baidu.com')
    except TimeoutException:
        print('Time Out')
    try:
        browser.find_element_by_id('hello')
    except NoSuchElementException:
        print('No Element')
    finally:
        browser.close()
    # 官方文档:http://selenium-python.readthedocs.io/api.html#module-selenium.common.exceptions
    
    
    
    
    
    
    
    
    
  • 相关阅读:
    nginx-1.8.1的安装
    ElasticSearch 在3节点集群的启动
    The type java.lang.CharSequence cannot be resolved. It is indirectly referenced from required .class files
    sqoop导入导出对mysql再带数据库test能跑通用户自己建立的数据库则不行
    LeetCode 501. Find Mode in Binary Search Tree (找到二叉搜索树的众数)
    LeetCode 437. Path Sum III (路径之和之三)
    LeetCode 404. Sum of Left Leaves (左子叶之和)
    LeetCode 257. Binary Tree Paths (二叉树路径)
    LeetCode Questions List (LeetCode 问题列表)- Java Solutions
    LeetCode 561. Array Partition I (数组分隔之一)
  • 原文地址:https://www.cnblogs.com/binyang/p/10998419.html
Copyright © 2011-2022 走看看