zoukankan      html  css  js  c++  java
  • 利用python爬取点小图片,满足私欲(爬虫)

     1 import requests
     2 import re
     3 import os,sys
     4 
     5 
     6 
     7 
     8 def get_url(page,headers):
     9     url='http://www.zbjuran.com/mei/xinggan/list_13_%s.html'%(page)
    10     data=requests.get(url,headers=headers).text
    11     data_use=re.findall('<div class="name"><a target="_blank" href=".*?" title=".*?</a></div>',data)
    12     for use in data_use:
    13         link='http://www.zbjuran.com/'+use.split('href="')[1].split('" title')[0]
    14         links.append(link)
    15         title=use.split('title="')[1].split('">')[0]
    16         titles.append(title)
    17         mkpath='/Users/b1ancheng/mzpc/%s'%title
    18         def get_pic():
    19             url_data=requests.get(link).text
    20             print(link)
    21             try:
    22                 link_page = int(url_data.split('<div class="page"><li><a>共')[1].split('页:')[0])
    23                 for i in range(1, link_page + 1):
    24                     print('正在下载第%s页'%i)
    25                     try:
    26                         pic_url = (link[:-5] + '_%s' + link[-5:])%i
    27                         print(pic_url)
    28                         try:
    29                             pic_data_link='http://www.zbjuran.com'+requests.get(pic_url,headers=headers).text.split('<img alt="" src="')[1].split('" /></div>')[0]
    30                             with open('/Users/b1ancheng/mzpc/%s/%s_%s.JPG' % (title, title,i),'wb') as pic_download:
    31                                 pic_download.write(requests.get(pic_data_link).content)
    32                         except Exception as otherdown:
    33                             print(otherdown)
    34                             pic_data_link = 'http://www.zbjuran.com' + requests.get(pic_url, headers=headers).text.split('<img src="')[1].split('" /></div>')[0]
    35                             with open('/Users/b1ancheng/mzpc/%s/%s_%s.JPG' % (title, title,i),'wb') as pic_download:
    36                                 pic_download.write(requests.get(pic_data_link).content)
    37                             continue
    38                     except Exception as error:
    39                         print(error)
    40                         continue
    41             except Exception as e1:
    42                 print(e1)
    43                 os.rmdir(mkpath)
    44                 pass
    45         # 创建目录          //可修改进get_pic
    46         isExists = os.path.exists(mkpath)
    47         if not isExists:
    48             os.makedirs(mkpath)
    49             get_pic()
    50         else:
    51             return False
    52 if __name__ == '__main__':
    53     headers = {
    54         'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36',
    55         'Host': 'www.zbjuran.com',
    56         'Cookie': 'UM_distinctid=15ef9964528386-07264d76850875-31657c00-13c680-15ef9964529361; CNZZDATA1264461841=1179231757-1507422986-null%7C1508056601'
    57     }
    58     links = []
    59     titles = []
    60     for page in range(1,88):
    61         get_url(page,headers=headers)

    望兄多提意见,相互进步

  • 相关阅读:
    一个简单的linux线程池(转-wangchenxicool)
    Linux下获得系统时间的C语言实现
    C语言实现简单线程池(转-Newerth)
    C语言实现Web客户端(转-kungstriving)
    C语言实现的Web服务器(转-kungstriving)
    linux和window下mkdir函数问题(转-锦曦月)
    linux C 获取当前目录的实现(转-Blossom)
    linux C之access函数(转-追梦的小鸟)
    Linux C 创建目录函数mkdir相关(转-清新居士)
    50个C/C++源代码网站(转-清风小阁)
  • 原文地址:https://www.cnblogs.com/b1ancheng/p/7671148.html
Copyright © 2011-2022 走看看