from selenium.webdriver import Chrome from selenium.webdriver.common.keys import Keys web = Chrome() # web.get('http://www.lagou.com') # # # 浏览器窗口最大化 # web.maximize_window() # # #找到某个元素,点击他 # el = web.find_element_by_xpath('....') # # el.click() # # #找到输入框,输入python -->>> 输入回车,点击搜索按钮 # web.find_element_by_xpath('....').send_keys('python', Keys.ENTER) # # #查找存放数据的位置,进行数据提取 # #找到页面中所有的li标签 # lis = web.find_elements_by_xpath('..../li') # for li in lis: # jobName = li.find_element_by_tag_name('h3').text # jobPrice = li.find_element_by_xpath('./..../span').text # companyName = li.find_element_by_xpath('./..../a').text # print(jobName, jobPrice, companyName) # # # #如何进入到新窗口中提取数据 # #注意:在selenium眼中,新窗口默认是不切换的 # web.switch_to.window(web.window_handles[-1]) # window_handles[-1] 就是最后一个窗口 # # #在新窗口中提取数据 # jobDetail = web.find_element_by_xpath('//..../div').text # print(jobPrice) # # #关闭子窗口 # web.close() # #变更selenium窗口视角,回到原来的窗口 # web.switch_to.window(web.window_handles[0]) # 如果页面中遇到了 iframe 标签 web.get('http://www.91kanju.com/vodd-play/541-2-1.html') #处理 iframe 的话,必须先拿到 iframe,然后切换视角到 iframe,之后才可以拿到数据 iframe = web.find_element_by_xpath('//*[@id="player_iframe"]') web.switch_to.frame(iframe) # 切换到 iframe txt = web.find_element_by_xpath('//*[@id="main"]/h3[1]').text # 获取iframe中的部分数据 print(txt) web.switch_to.default_content() # 回到默认的 html(从iframe回切,回到原页面)
来源:B站视频
二、基本用法
1 #官网链接:http://selenium-python.readthedocs.io/locating-elements.html 2 from selenium import webdriver 3 from selenium.webdriver import ActionChains 4 from selenium.webdriver.common.by import By #按照什么方式查找,By.ID,By.CSS_SELECTOR 5 from selenium.webdriver.common.keys import Keys #键盘按键操作 6 from selenium.webdriver.support import expected_conditions as EC 7 from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素 8 import time 9 10 driver=webdriver.Chrome() 11 driver.get('https://www.baidu.com') 12 wait=WebDriverWait(driver,10) 13 14 try: 15 #===============所有方法=================== 16 # 1、find_element_by_id 17 # 2、find_element_by_link_text 18 # 3、find_element_by_partial_link_text 19 # 4、find_element_by_tag_name 20 # 5、find_element_by_class_name 21 # 6、find_element_by_name 22 # 7、find_element_by_css_selector 23 # 8、find_element_by_xpath 24 # 强调: 25 # 1、上述均可以改写成find_element(By.ID,'kw')的形式 26 # 2、find_elements_by_xxx的形式是查找到多个元素,结果为列表 27 28 #===============示范用法=================== 29 # 1、find_element_by_id 30 print(driver.find_element_by_id('kw')) 31 32 # 2、find_element_by_link_text 33 # login=driver.find_element_by_link_text('登录') 34 # login.click() 35 36 # 3、find_element_by_partial_link_text 37 login=driver.find_elements_by_partial_link_text('录')[0] 38 login.click() 39 40 # 4、find_element_by_tag_name 41 print(driver.find_element_by_tag_name('a')) 42 43 # 5、find_element_by_class_name 44 button=wait.until(EC.element_to_be_clickable((By.CLASS_NAME,'tang-pass-footerBarULogin'))) 45 button.click() 46 47 # 6、find_element_by_name 48 input_user=wait.until(EC.presence_of_element_located((By.NAME,'userName'))) 49 input_pwd=wait.until(EC.presence_of_element_located((By.NAME,'password'))) 50 commit=wait.until(EC.element_to_be_clickable((By.ID,'TANGRAM__PSP_10__submit'))) 51 52 input_user.send_keys('18611453110') 53 input_pwd.send_keys('xxxxxx') 54 commit.click() 55 56 # 7、find_element_by_css_selector 57 driver.find_element_by_css_selector('#kw') 58 59 # 8、find_element_by_xpath 60 61 time.sleep(5) 62 63 finally: 64 driver.close()
三、xapth
1 #官网链接:http://selenium-python.readthedocs.io/locating-elements.html 2 from selenium import webdriver 3 from selenium.webdriver import ActionChains 4 from selenium.webdriver.common.by import By #按照什么方式查找,By.ID,By.CSS_SELECTOR 5 from selenium.webdriver.common.keys import Keys #键盘按键操作 6 from selenium.webdriver.support import expected_conditions as EC 7 from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素 8 import time 9 10 driver=webdriver.PhantomJS() 11 driver.get('https://doc.scrapy.org/en/latest/_static/selectors-sample1.html') 12 # wait=WebDriverWait(driver,3) 13 driver.implicitly_wait(3) #使用隐式等待 14 15 try: 16 # find_element_by_xpath 17 #//与/ 18 # driver.find_element_by_xpath('//body/a') # 开头的//代表从整篇文档中寻找,body之后的/代表body的儿子,这一行找不到就会报错了 19 20 driver.find_element_by_xpath('//body//a') # 开头的//代表从整篇文档中寻找,body之后的//代表body的子子孙孙 21 driver.find_element_by_css_selector('body a') 22 23 #取第n个 24 res1=driver.find_elements_by_xpath('//body//a[1]') #取第一个a标签 25 print(res1[0].text) 26 27 #按照属性查找,下述三者查找效果一样 28 res1=driver.find_element_by_xpath('//a[5]') 29 res2=driver.find_element_by_xpath('//a[@href="image5.html"]') 30 res3=driver.find_element_by_xpath('//a[contains(@href,"image5")]') #模糊查找 31 print('==>', res1.text) 32 print('==>',res2.text) 33 print('==>',res3.text) 34 35 36 #其他 37 res1=driver.find_element_by_xpath('/html/body/div/a') 38 print(res1.text) 39 40 res2=driver.find_element_by_xpath('//a[img/@src="image3_thumb.jpg"]') #找到子标签img的src属性为image3_thumb.jpg的a标签 41 print(res2.tag_name,res2.text) 42 43 res3 = driver.find_element_by_xpath("//input[@name='continue'][@type='button']") #查看属性name为continue且属性type为button的input标签 44 res4 = driver.find_element_by_xpath("//*[@name='continue'][@type='button']") #查看属性name为continue且属性type为button的所有标签 45 46 47 time.sleep(5) 48 49 finally: 50 driver.close()
四、获取标签属性
1 from selenium import webdriver 2 from selenium.webdriver import ActionChains 3 from selenium.webdriver.common.by import By #按照什么方式查找,By.ID,By.CSS_SELECTOR 4 from selenium.webdriver.common.keys import Keys #键盘按键操作 5 from selenium.webdriver.support import expected_conditions as EC 6 from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素 7 8 browser=webdriver.Chrome() 9 10 browser.get('https://www.amazon.cn/') 11 12 wait=WebDriverWait(browser,10) 13 wait.until(EC.presence_of_element_located((By.ID,'cc-lm-tcgShowImgContainer'))) 14 15 tag=browser.find_element(By.CSS_SELECTOR,'#cc-lm-tcgShowImgContainer img') 16 17 #获取标签属性, 18 print(tag.get_attribute('src')) 19 20 21 #获取标签ID,位置,名称,大小(了解) 22 print(tag.id) 23 print(tag.location) 24 print(tag.tag_name) 25 print(tag.size) 26 27 28 browser.close() 29 30 获取标签属性