zoukankan      html  css  js  c++  java
  • 爬虫 selenium 基础操作

    Driver下载地址

    chromedriver

    selenium

    Driver的常用的API

    基本

    from selenium import webdriver
    executable_path = "chromedriver"
    driver = webdriver.Chrome(executable_path=executable_path)
    # 获取所有的frame
    for window in driver.window_handles:
        print(window)
    # 切换 frame
    driver.switch_to.frame(1)
    # 前进
    driver.forward()
    # 后退
    driver.back()
    # 输入框输入
    driver.find_element_by_id("kw").send_keys("new")
    # 获取页面的元素
    element = driver.find_element_by_id("element")
    # 获取元素内的文本
    print(element.text)
    # 获取元素的属性
    element.get_attribute('id')
    
    # 设置 input 的值
    driver.execute_script("arguments[0].value = '17037458040';", driver.find_element_by_id("nameNormal"))
    driver.execute_script("arguments[0].value = 'qcc203010';", driver.find_element_by_id("pwdNormal"))
    
    

    Select

    from selenium.webdriver.support.ui import Select
    from selenium import webdriver
    executable_path = "chromedriver"
    driver = webdriver.Chrome(executable_path=executable_path)
    # 针对 select 选择框
    select = Select(driver.find_element_by_id("select"))
    # 选择第一个
    select.select_by_index(1)
    # 选择value = new 的值
    select.select_by_value("new")
    # 选择 text = new 的值
    select.select_by_visible_text("new")
    
    # 添加 cookie
    driver.add_cookie({"name": "name", "value": "value", "path": "/"})
    # 打印所有的cookie
    for cookie in driver.get_cookies():
        print(cookie)
    

    页面等待

    显式等待

    显式等待 指定某个条件,设置等待时间,如果超出这个时间没有找到该元素,便会抛出异常

    from selenium.webdriver.common.by import By
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.support.ui import Select
    from time import sleep
    
    try:  # 判断form表单ajax加载完成标记:id属性
        element = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, '//select[contains(@id,"yui_3_16")]')))
        s1 = Select(driver.find_element_by_name('province'))
        # 实例化Select
        s1.select_by_visible_text("山西")
        sleep(5)
        # seconds
        s2 = Select(driver.find_element_by_name('city'))
        # 实例化Select
        s2.select_by_visible_text("大同")
        sleep(5)  # seconds
        driver.find_element_by_class_name('btn btn-mini').submit()
    finally:
        print('end')  
        driver.quit()
    

    隐式等待

    隐式等待目的是让WebDriver在查找某个或某类元素时候容留一定的时间来进行检查。
    在这个时间内,如果找到就返回。否则就等到超过设置的时间并告知没有找到。

    driver.implicitly_wait(30)
    driver.get("https://www.google.co.in/")
    driver.find_element_by_id("lst-ib")
    

    设置User-Agent和Proxy

    # 设置ua
    option.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.92 Safari/537.36')
    # 设置代理
    option.add_argument("--proxy-server=http://localhost:8001")
    driver = webdriver.Chrome(r"chromedriver.exe", options=option)
    

    设置防止JS检测

    # 比较实用的防止被检测到的方法
    # window.navigator.webdriver
    driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
      "source": """
        Object.defineProperty(navigator, 'webdriver', {
          get: () => undefined
        })
      """
    })
    
  • 相关阅读:
    Educational Codeforces Round 86 (Rated for Div. 2) D. Multiple Testcases
    Educational Codeforces Round 86 (Rated for Div. 2) C. Yet Another Counting Problem
    HDU
    HDU
    HDU
    HDU
    Good Bye 2019 C. Make Good (异或的使用)
    Educational Codeforces Round 78 (Rated for Div. 2) C. Berry Jam
    codeforces 909C. Python Indentation
    codeforces1054 C. Candies Distribution
  • 原文地址:https://www.cnblogs.com/iFanLiwei/p/12853187.html
Copyright © 2011-2022 走看看