selenium是一个浏览器驱动模块,支持多种浏览器,主要用于自动化测试。
爬虫中主要用来解决JavaScript渲染的问题。
效果示例:
from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait browser = webdriver.Chrome() try: browser.get("http://www.baidu.com") input = browser.find_element_by_id("kw") # 通过ID找到输入框 input.send_keys("Python") # 向输入框输入字符 input.send_keys(Keys.ENTER) # 按ENTER键,这个和找到那个搜索元素,然后执行点击操作一样的 wait = WebDriverWait(browser,10) # 创建一个wait wait.until(EC.presence_of_element_located((By.ID,"content_left"))) print(browser.current_url) # 当前页面的url print(browser.get_cookies()) print(browser.page_source) # 当前页面的源代码 finally: browser.close()
查找元素
单个元素:如果想查多个元素就只要在element后面加个s就可以了
from selenium import webdriver browser = webdriver.Chrome() browser.get("https://www.taobao.com") input = browser.find_element_by_id("q") print(input) browser.close() # find_element_by_name # find_element_by_xpath # find_element_by_link_text # find_element_by_partial_link_text # find_element_by_tag_name # find_element_by_class_name # find_element_by_css_selector # 另一种通用方法 from selenium import webdriver from selenium.webdriver.common.by import By browser = webdriver.Chrome() browser.get("https://www.taobao.com") input = browser.find_element(By.ID,"q") print(input) browser.close()
元素交互操作:
对获取的元素调用交互方法
from selenium import webdriver import time browser = webdriver.Chrome() browser.get("https://www.taobao.com") input = browser.find_element_by_id("q") input.send_keys("iPhone") time.sleep(1) input.clear() input.send_keys("iPad") button = browser.find_element_by_class_name("btn-search") button.click()
交互动作:
将动作附加到动作链中串行执行
from selenium import webdriver from selenium.webdriver import ActionChains browser = webdriver.Chrome() url = "http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable" browser.get(url) browser.switch_to.frame("iframeResult") source = browser.find_element_by_css_selector("#draggable") target = browser.find_element_by_css_selector("#droppable") actions = ActionChains(browser) actions.drag_and_drop(source,target) actions.perform()
执行JavaScript
from selenium import webdriver browser = webdriver.Chrome() browser.get("https://www.zhihu.com/explore") browser.execute_script("window.scrollTo(0,document.body.scrollHeight)") browser.execute_script('alert("To Bottom")')
获取元素信息
获取属性
from selenium import webdriver from selenium.webdriver import ActionChains browser = webdriver.Chrome() url = "https://www.zhihu.com/explore" browser.get(url) logo = browser.find_element_by_id("zh-top-link-logo") print(logo) print(logo.get_attribute("class"))
获取文本值
from selenium import webdriver browser = webdriver.Chrome() url = "https://www.zhihu.com/explore" browser.get(url) input = browser.find_element_by_class_name("zu-top-add-question") print(input.id) print(input.location) print(input.tag_name) print(input.size)
Frame
import time from selenium import webdriver from selenium.common.exceptions import NoSuchElementException browser = webdriver.Chrome() url = "http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable" browser.get(url) browser.switch_to.frame("iframeResult") source = browser.find_element_by_css_selector("#draggable") print(source) try: logo = browser.find_element_by_class_name("logo") except NoSuchElementException: print("NO LOGO") browser.switch_to.parent_frame() logo = browser.find_element_by_class_name("logo") print(logo) print(logo.text)
等待
隐式等待
选项卡管理
import time from selenium import webdriver browser = webdriver.Chrome() browser.get("https://www.baidu.com") browser.execute_script("window.open()") print(browser.window_handles) browser.switch_to.window(browser.window_handles[1]) browser.get("https://www.taobao.com") time.sleep(1) browser.switch_to.window(browser.window_handles[0]) browser.get("https://python.org")
参考:https://blog.csdn.net/ccggaag/article/details/76652274