目的:
馋她
效果:
实现:
1 from Common import Common 2 import requests 3 4 class Picture(Common): 5 """ 6 title:Python爬取新浪深田图片 7 time:2020/03/08 15:57 8 """ 9 def get_imgs(self): 10 url = 'https://weibo.com/232655234?profile_ftype=1&is_all=1#1583650565067' 11 #创建打开Chrome()对象,访问url 12 self.open_url(url) 13 #向下滚动加载更多 14 self.scroll_top(0,800,20) 15 #获取图片标题 16 img_title = self.get_text("xpath",'//*[@id="Pl_Official_Headerv6__1"]/div[1]/div/div[2]/div[2]/h1') 17 #获取图片列表 18 img_list = self.locateElement("xpath_s",'//*[@id="Pl_Official_MyProfileFeed__20"]/div/div[*]/div[*]/div[*]/div[*]/div/ul/li[*]/img') 19 for index, item in enumerate(img_list): 20 # 获取列表src属性中的href属性 21 item_href = item.get_attribute("src") 22 # 导入访问图片的src 23 res = requests.get(item_href) 24 # 定义生成图片的名称 25 img_name = img_title+"[" + str(index+1) + "].jpg" 26 # 生成图片 27 print(img_name+"开始写入----"+item_href) 28 with open('./image/'+img_name, 'wb') as f: 29 f.write(res.content) 30 print(img_name+"第"+str(index+1)+"张图片下载完成") 31 32 if __name__ == '__main__': 33 mn = Picture() 34 mn.get_imgs()
公共:
1 from selenium import webdriver 2 from time import sleep 3 4 class Common(object): 5 def __init__(self): 6 self.chrome = webdriver.Chrome() 7 self.chrome.implicitly_wait(5) 8 self.chrome.maximize_window() 9 10 def open_url(self,url): 11 self.chrome.get(url) 12 self.chrome.implicitly_wait(10) 13 14 def locateElement(self,locate_type,value): 15 le = None 16 if locate_type == 'css': 17 el = self.chrome.find_element_by_css_selector(value) 18 elif locate_type == 'css_s': 19 el = self.chrome.find_elements_by_css_selector(value) 20 elif locate_type == 'name': 21 el = self.chrome.find_element_by_name(value) 22 elif locate_type == 'name_s': 23 el = self.chrome.find_elements_by_name(value) 24 elif locate_type == 'class': 25 el = self.chrome.find_element_by_class_name(value) 26 elif locate_type == 'class_s': 27 el = self.chrome.find_elements_by_class_name(value) 28 elif locate_type == 'id': 29 el = self.chrome.find_element_by_id(value) 30 elif locate_type == 'tag': 31 el = self.chrome.find_element_by_tag_name(value) 32 elif locate_type == 'tag_s': 33 el = self.chrome.find_elements_by_tag_name(value) 34 elif locate_type == 'xpath': 35 el = self.chrome.find_element_by_xpath(value) 36 elif locate_type == 'xpath_s': 37 el = self.chrome.find_elements_by_xpath(value) 38 elif locate_type == 'text': 39 el = self.chrome.find_element_by_link_text(value) 40 elif locate_type == 'text_s': 41 el = self.chrome.find_elements_by_link_text(value) 42 elif locate_type == 'liketext': 43 el = self.chrome.find_element_by_partial_link_text(value) 44 elif locate_type == 'liketext_s': 45 el = self.chrome.find_elements_by_partial_link_text(value) 46 47 if el is not None: 48 return el 49 50 def click(self,locate_type,value): 51 el = self.locateElement(locate_type,value) 52 el.click() 53 54 def input_data(self,locate_type,value,date): 55 el = self.locateElement(locate_type,value) 56 el.send_keys(date) 57 58 def get_text(self,locate_type,value): 59 el = self.locateElement(locate_type,value) 60 return el.text 61 62 def get_attr(self,locate_type,value,attrname): 63 el = self.locateElement(locate_type,value) 64 return el.get_attribute(attrname) 65 66 def scroll_top(self,x,y,num): 67 index = y 68 sleep(5) 69 for item in range(num): 70 print(index) 71 js = "window.scrollTo("+str(x)+","+str(index)+")" 72 self.chrome.execute_script(js) 73 index=index+y 74 sleep(0.5) 75 76 def close_win(self): 77 self.chrome.quit() 78 79 def __del__(self): 80 sleep(1) 81 self.chrome.close()