zoukankan      html  css  js  c++  java
  • python爬虫demo01

    python爬虫demo01

    复制代码
     1 import requests, json, time, sys
     2 from bs4 import BeautifulSoup
     3 from contextlib import closing
     4 
     5 url = 'https://image.xiaozhustatic1.com/12/9,0,27,3473,1800,1200,d064ccfb.jpg'
     6 headers = {
     7     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
     8 }
     9 
    10 url2 = 'http://bj.xiaozhu.com/'
    11 
    12 
    13 def get_img_src_list(url):
    14     """
    15     获取图片的url
    16     :param url:
    17     :return:
    18     """
    19     res = requests.get(url, headers=headers)
    20     res_data = BeautifulSoup(res.text, 'lxml')
    21     imgs = res_data.select('#page_list > ul > li > a > img')
    22     # titles = res_data.select('#page_list > ul > li > div.result_btm_con.lodgeunitname > div > a > span')
    23     # print( imgs )
    24     img_srcs = []
    25     img_alt = []
    26     for img in imgs:
    27         img_srcs.append(img.get('lazy_src'))
    28     return img_srcs
    29 
    30 
    31 def downloadPic(url, pic_name):
    32     """
    33     下载图片
    34     :param url:
    35     :param pic_name:
    36     :return:
    37     """
    38     res = requests.get(url, headers=headers, stream=True)
    39     with closing(res) as r:
    40         with open('%d.jpg' %pic_name, 'ab+') as f:
    41             for chunk in res.iter_content(chunk_size=1024):
    42                 if chunk:
    43                     f.write(chunk)
    44                     f.flush()
    45     print('下载{}.jpg成功!'.format(pic_name))
    46 
    47 if __name__ == '__main__':
    48     img_srcs = get_img_src_list(url2)
    49     for i in range(len(img_srcs)):
    50         print(img_srcs[i])
    51        # time.sleep(1)
    52         downloadPic(img_srcs[i], i)
    53         i += 1
    复制代码
  • 相关阅读:
    JSON与JSONP的区别
    BFC(块级格式上下文)
    面试题--新
    javascript 类数组对象
    WebP 图片实践之路
    HTTP,HTTP2.0,SPDY,HTTPS你应该知道的一些事
    前端面试题目
    JS 中的事件设计
    博客声明
    1.2 线性表的链式表示
  • 原文地址:https://www.cnblogs.com/valorchang/p/11476835.html
Copyright © 2011-2022 走看看