Driver下载地址
Driver的常用的API
基本
from selenium import webdriver
executable_path = "chromedriver"
driver = webdriver.Chrome(executable_path=executable_path)
# 获取所有的frame
for window in driver.window_handles:
print(window)
# 切换 frame
driver.switch_to.frame(1)
# 前进
driver.forward()
# 后退
driver.back()
# 输入框输入
driver.find_element_by_id("kw").send_keys("new")
# 获取页面的元素
element = driver.find_element_by_id("element")
# 获取元素内的文本
print(element.text)
# 获取元素的属性
element.get_attribute('id')
# 设置 input 的值
driver.execute_script("arguments[0].value = '17037458040';", driver.find_element_by_id("nameNormal"))
driver.execute_script("arguments[0].value = 'qcc203010';", driver.find_element_by_id("pwdNormal"))
Select
from selenium.webdriver.support.ui import Select
from selenium import webdriver
executable_path = "chromedriver"
driver = webdriver.Chrome(executable_path=executable_path)
# 针对 select 选择框
select = Select(driver.find_element_by_id("select"))
# 选择第一个
select.select_by_index(1)
# 选择value = new 的值
select.select_by_value("new")
# 选择 text = new 的值
select.select_by_visible_text("new")
Cookie
# 添加 cookie
driver.add_cookie({"name": "name", "value": "value", "path": "/"})
# 打印所有的cookie
for cookie in driver.get_cookies():
print(cookie)
页面等待
显式等待
显式等待 指定某个条件,设置等待时间,如果超出这个时间没有找到该元素,便会抛出异常
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from time import sleep
try: # 判断form表单ajax加载完成标记:id属性
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '//select[contains(@id,"yui_3_16")]')))
s1 = Select(driver.find_element_by_name('province'))
# 实例化Select
s1.select_by_visible_text("山西")
sleep(5)
# seconds
s2 = Select(driver.find_element_by_name('city'))
# 实例化Select
s2.select_by_visible_text("大同")
sleep(5) # seconds
driver.find_element_by_class_name('btn btn-mini').submit()
finally:
print('end')
driver.quit()
隐式等待
隐式等待目的是让WebDriver在查找某个或某类元素时候容留一定的时间来进行检查。
在这个时间内,如果找到就返回。否则就等到超过设置的时间并告知没有找到。
driver.implicitly_wait(30)
driver.get("https://www.google.co.in/")
driver.find_element_by_id("lst-ib")
设置User-Agent和Proxy
# 设置ua
option.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.92 Safari/537.36')
# 设置代理
option.add_argument("--proxy-server=http://localhost:8001")
driver = webdriver.Chrome(r"chromedriver.exe", options=option)
设置防止JS检测
# 比较实用的防止被检测到的方法
# window.navigator.webdriver
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
})
"""
})