Selenium模块
它是一个自动化测试工具,可以支持多个浏览器,在爬虫中主要用来解决JavaScript渲染的问题。
安装:pip3 install selenium
Selenium用法介绍
1、基本用法
from selenium import webdriver # webdriver就是一个浏览器驱动对象 from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait browser = webdriver.Chrome() try: browser.get('http://www.baidu.com') input = browser.find_element_by_id('kw') input.send_keys('Python') input.send_keys(Keys.ENTER) wait = WebDriverWait(browser,10) wait.until(EC.presence_of_element_located((By.ID,'content_left'))) # 等待content_left被加载出来 print(browser.current_url) print(browser.get_cookies()) print(browser.page_source) finally: browser.close()
2、声明浏览器对象
from selenium import webdriver browser = webdriver.Chrome() browser = webdriver.Firefox() browser = webdriver.Edge() browser = webdriver.Safari()
3、访问页面
from selenium import webdriver browser = webdriver.Chrome() browser.get('http://www.baidu.com') print(browser.page_source) browser.close()
4、查找元素
from selenium import webdriver browser = webdriver.Chrome() browser.get('https://www.taobao.com') input_first = browser.find_element_by_id('q') input_second = browser.find_elements_by_css_selector('#q') print(input_first) print(input_second) browser.close()
-
browser.find_element_by_id()
-
browser.find_element_by_class_name()
-
browser.find_element_by_name()
-
browser.find_element_by_link_text()
-
browser.find_element_by_xpath()
-
browser.find_element_by_partial_link_text()
-
browser.find_element_by_tag_name()
第二种查找元素的方法,通用的方式
from selenium import webdriver from selenium.webdriver.common.by import By browser = webdriver.Chrome() browser.get('https://www.taobao.com') input_first = browser.find_element(By.ID,'q') print(input_first) browser.close()
5、多个元素
from selenium import webdriver from selenium.webdriver.common.by import By browser = webdriver.Chrome() browser.get('https://www.taobao.com') # lis = browser.find_elements_by_css_selector('.service-bd li') lis = browser.find_elements(By.CSS_SELECTOR,'.service-bd li') print(lis) browser.close()
6、元素的交互操作
对获得元素调用交互方法
import time from selenium import webdriver from selenium.webdriver.common.by import By browser = webdriver.Chrome() browser.get('https://www.taobao.com') input = browser.find_element_by_id('q') input.send_keys('iphone') time.sleep(2) input.clear() input.send_keys('ipad') button = browser.find_element_by_class_name('btn-search') button.click()
更多操作查询http://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.remote.webelement
7、交互动作:将动作附加到动作链中串行执行
from selenium import webdriver from selenium.webdriver import ActionChains browser = webdriver.Chrome() url = 'http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable' browser.get(url) browser.switch_to.frame('iframeResult') source = browser.find_elements_by_css_selector('#draggable') target = browser.find_elements_by_css_selector('#droppable') actions = ActionChains(browser) actions.drag_and_drop(source,target) actions.perform()
8、执行JavaScript
from selenium import webdriver browser = webdriver.Chrome() browser.get('https://www.zhihu.com/explore') browser.execute_script('window.scrollTo(0,document.body.scrollHeight)') browser.execute_script("alert('To Bottom')")
9、获取元素信息
- 获取属性
from selenium import webdriver browser = webdriver.Chrome() browser.get('https://www.zhihu.com/explore') logo = browser.find_element_by_id('zh-top-link-logo') print(logo) print(logo.get_attribute('class'))
- 获取文本值
from selenium import webdriver browser = webdriver.Chrome() browser.get('https://www.zhihu.com/explore') input = browser.find_element_by_class_name('zu-top-add-question') print(input.text)
- 获取位置,ID,标签名,大小
from selenium import webdriver browser = webdriver.Chrome() browser.get('https://www.zhihu.com/explore') input = browser.find_element_by_class_name('zu-top-add-question') print(input.text) print(input.id) print(input.location) print(input.tag_name) print(input.size)
10、Frame
import time from selenium import webdriver from selenium.common.exceptions import NoSuchElementException browser = webdriver.Chrome() url = 'http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable' browser.get(url) browser.switch_to.frame('iframeResult') source = browser.find_elements_by_css_selector('#draggable') print(source) try: logo = browser.find_element_by_class_name('logo') except NoSuchElementException: print('No LOGO') browser.switch_to.parent_frame() # 切换父元素的frame logo = browser.find_element_by_class_name('logo') print(logo) print(logo.text)
11、等待
- 隐式等待:当使用了隐式等待执行测试的时候,如果WebDriver没有在DOM中找到元素,将继续等待,超过设定时间后则抛出找不到元素的异常,换句话说,当查找元素或元素并没有立即出现的时候,隐式等待将等待一段时间再查找DOM,默认的时间是0
from selenium import webdriver browser = webdriver.Chrome() browser.implicitly_wait(10) browser.get('https://www.zhihu.com/explore') input = browser.find_element_by_class_name('zu-top-add-question') print(input.text)
- 显式等待
from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC browser = webdriver.Chrome() browser.get('https://www.taobao.com') wait = WebDriverWait(browser,10) input = wait.until(EC.presence_of_element_located((By.ID,'q'))) button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'.btn-search'))) print(input) print(button) # EC.title_is # 标题是某内容 # EC.title_contains # 标题包含某内容 # EC.presence_of_element_located # 元素加载出,传入定位元素 # EC.visibility_of # 可见,传入元素对象 # EC.presence_of_all_elements_located # 所有元素加载出 # EC.element_to_be_clickable # 元素可点击 # EC.element_located_to_be_selected # 元素可选择,传定位元素
12、浏览器的前进和后退
import time from selenium import webdriver browser = webdriver.Chrome() browser.get('https://www.baidu.com') browser.get('https://www.taobao.com') browser.get('https://www.python.org') browser.back() time.sleep(1) browser.forward() browser.close()
13、Cookies
from selenium import webdriver browser = webdriver.Chrome() browser.get('https://www.zhihu.com/explore') print(browser.get_cookies()) browser.add_cookie({'name':"name",'domain':'www.zhihu.com','value':'germay'}) print(browser.get_cookies()) browser.delete_all_cookies() print(browser.get_cookies())
14、选项卡管理
from selenium import webdriver import time browser = webdriver.Chrome() browser.get('https://www.zhihu.com/explore') browser.execute_script('window.open()')# 新开一个选项卡 print(browser.window_handles) # 所有的选项卡 browser.switch_to.window(browser.window_handles[1]) # 切换 browser.get('https://www.taobao.com') time.sleep(1) browser.switch_to.window(browser.window_handles[0]) # 切换回 browser.get('https://www.baidu.com')
15、异常处理
from selenium import webdriver from selenium.common.exceptions import TimeoutException,NoSuchElementException browser = webdriver.Chrome() try: browser.get('https://www.taobao.com') except TimeoutException: print('Time out') try: browser.find_element_by_id('hello') except NoSuchElementException: print('NO element') finally: browser.close()
上面只是简单的异常展示,更多异常请参考http://selenium-python.readthedocs.io/api.html#module-selenium.common.exceptions