ε=(´ο`*)))唉 持续更新吧...
11.16更新
保存网页上的图片 并指定命名
1 # -*- coding:utf-8 -*- 2 import urllib2 3 import re 4 import urllib 5 # 爬取淘宝小姐姐头像 6 7 class Spider: 8 9 def __init__(self): 10 self.siteURL = "https://mm.taobao.com/json/request_top_list.htm?page=" 11 12 def getPage(self, pageIndex): 13 curURL = self.siteURL + str(pageIndex) 14 request = urllib2.Request(curURL) 15 response = urllib2.urlopen(request) 16 return response.read().decode('gbk') 17 18 def getPageImg(self, pageIndex): 19 content = self.getPage(pageIndex) 20 pattern = re.compile('<div class="list-item".*?img src="(.*?)".*?<a class="lady-name".*?target="_blank">(.*?)</a>',re.S) 21 find = re.findall(pattern, content) 22 for item in find: 23 # item[1].encode('utf-8') 24 self.saveImg(item[0],item[1]) 25 26 # 根据网址和姓名命名来保存图片 27 def saveImg(self, url, name): 28 print "正在保存",name,"的头像到当前文件夹..." 29 imgurl = "http:" + url 30 # u = urllib2.urlopen(imgurl) 31 u = urllib.urlopen(imgurl) 32 data = u.read() 33 filename = name + ".jpg" 34 f = open(filename,'wb') 35 f.write(data) 36 f.close() 37 38 39 spider = Spider() 40 spider.getPageImg(1)