zoukankan      html  css  js  c++  java
  • python爬虫之爬取网站图片

    #!/usr/local/bin/python3.7
    
    import re
    import urllib.request
    import urllib.parse
    import os
    import time
    
    """
    @File    :   qiushibaike.py
    @Time    :   2020/04/06
    @Author  :   Mozili
    
    """
    
    """
    爬取糗事百科中指定页码的图片
    
    """
    def handler_request(url):
        headers = {
        'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1 Safari/605.1.15'
        } 
        # 创建请求
        req = urllib.request.Request(url=url, headers=headers)
        # 发送请求
        rep = urllib.request.urlopen(req)
        # 获取返回内容
        cont = rep.read().decode()
        return cont
    
    def download_image(content):
        pattern = re.compile(r'<div class="thumb">.*?<img src="(.*?)" .*?">.*?</div>', re.S)
        ret = pattern.findall(content)
        # print(ret)
        # 生成图片url,下载图片到本地
        for image_url in ret:
            image_url = 'https:' + image_url
            # 创建文件夹保存下载下来的文件
            driname  = 'Reptile/images'
            if not os.path.exists(driname):
                os.mkdir(driname)
            # 创建保存路径
            image_name = image_url.split('/')[-1]
            image_path = driname + '/' + image_name
            # 直接将返回的内容保存
            print('图片{}开始下载....'.format(image_name))
            time.sleep(1)
            resp = urllib.request.urlretrieve(image_url, image_path)
    
    def main():
        # 提示输入爬取第几页到第几页的图片
        start_page = int(input('请输入起始页码:'))
        end_page = int(input('请输入结束页码:'))
    
        for i in range(start_page, end_page + 1):
            url = 'https://www.qiushibaike.com/imgrank/page/' + str(i) + '/'
            # 生成一个请求
            content = handler_request(url)
            # 从返回内容中获取图片链接,下载图片
            download_image(content)
            print('第%s页下载结束...'%i)
            time.sleep(1)
            print()
            print()
    
    if __name__ == "__main__":
        main()
  • 相关阅读:
    存储过程语法二
    存储过程语法一
    存储过程的优点
    .NET中Redis安装部署及使用方法简介
    UEditor富文本web编辑器
    未找到与约束contractname Microsoft.VisualStudio.Utilities.IContentTypeRegistryService
    comet 推送消息到客户端
    文本框 只能输入数字和小数点验证
    asp.net Cache
    Windows10放开Administrator权限
  • 原文地址:https://www.cnblogs.com/lxmtx/p/12643547.html
Copyright © 2011-2022 走看看