1 import requests 2 import re 3 import os,sys 4 5 6 7 8 def get_url(page,headers): 9 url='http://www.zbjuran.com/mei/xinggan/list_13_%s.html'%(page) 10 data=requests.get(url,headers=headers).text 11 data_use=re.findall('<div class="name"><a target="_blank" href=".*?" title=".*?</a></div>',data) 12 for use in data_use: 13 link='http://www.zbjuran.com/'+use.split('href="')[1].split('" title')[0] 14 links.append(link) 15 title=use.split('title="')[1].split('">')[0] 16 titles.append(title) 17 mkpath='/Users/b1ancheng/mzpc/%s'%title 18 def get_pic(): 19 url_data=requests.get(link).text 20 print(link) 21 try: 22 link_page = int(url_data.split('<div class="page"><li><a>共')[1].split('页:')[0]) 23 for i in range(1, link_page + 1): 24 print('正在下载第%s页'%i) 25 try: 26 pic_url = (link[:-5] + '_%s' + link[-5:])%i 27 print(pic_url) 28 try: 29 pic_data_link='http://www.zbjuran.com'+requests.get(pic_url,headers=headers).text.split('<img alt="" src="')[1].split('" /></div>')[0] 30 with open('/Users/b1ancheng/mzpc/%s/%s_%s.JPG' % (title, title,i),'wb') as pic_download: 31 pic_download.write(requests.get(pic_data_link).content) 32 except Exception as otherdown: 33 print(otherdown) 34 pic_data_link = 'http://www.zbjuran.com' + requests.get(pic_url, headers=headers).text.split('<img src="')[1].split('" /></div>')[0] 35 with open('/Users/b1ancheng/mzpc/%s/%s_%s.JPG' % (title, title,i),'wb') as pic_download: 36 pic_download.write(requests.get(pic_data_link).content) 37 continue 38 except Exception as error: 39 print(error) 40 continue 41 except Exception as e1: 42 print(e1) 43 os.rmdir(mkpath) 44 pass 45 # 创建目录 //可修改进get_pic 46 isExists = os.path.exists(mkpath) 47 if not isExists: 48 os.makedirs(mkpath) 49 get_pic() 50 else: 51 return False 52 if __name__ == '__main__': 53 headers = { 54 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36', 55 'Host': 'www.zbjuran.com', 56 'Cookie': 'UM_distinctid=15ef9964528386-07264d76850875-31657c00-13c680-15ef9964529361; CNZZDATA1264461841=1179231757-1507422986-null%7C1508056601' 57 } 58 links = [] 59 titles = [] 60 for page in range(1,88): 61 get_url(page,headers=headers)
望兄多提意见,相互进步