zoukankan html css js c++ java

python爬取图片

一、给定url，直接下载到本地

import re
import requests
from bs4 import BeautifulSoup


def get_gif(url, a):
    response = requests.get(url)
    # 自己电脑上的文件夹路径，默认命名 a.gif
    with open("C:\Users\acm\Desktop\新建文件夹\%d.gif" % a, 'wb') as file:
        file.write(response.content)


if __name__ == '__main__':
    # url 获取动态图片地址
    url = 'http://game.gtimg.cn/images/nz/cp/a20201117decbeta/m1-prop1.gif'
    get_gif(url, 1)

View Code

二、给定网页链接url，按照规律爬取网页上的所有图片

import re
import requests
from bs4 import BeautifulSoup


def get_url(url):
    # 获取网页链接
    response = requests.get(url)
    response.encoding = 'utf-8'
    # print(response.text)
    # 根据正则表达式查找一系列url地址, 即下面括号中的部分, 这根据网络代码中的规律来自行更改
    url_addr = r'<img src="(.*?)" alt=".*?">'
    # 从网页上查找所有符合条件的图片链接
    url_list = re.findall(url_addr, response.text)
    return url_list


def get_photo(url, a):
    response = requests.get(url)
    # 命名为 a.jpg 下载到本地
    with open("C:\Users\acm\Desktop\新建文件夹\%d.jpg" % a, 'wb') as file:
        file.write(response.content)


if __name__ == '__main__':
    # 网页链接url
    url = 'http://www.netbian.com/'
    url_list = get_url(url)
    a = 1
    for url in url_list:
        get_photo(url, a)
        a += 1

View Code

三、加上了网页的headers和文件操作

import re
import os
import requests
from bs4 import BeautifulSoup

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'
}

if __name__ == '__main__':
    url = 'http://www.netbian.com/weimei/'
    response = requests.get(url, headers)
    response.encoding = 'utf-8'
    urls = re.findall('img src="(.*?)" alt=".*?"', response.text)
    print(urls)
    a = 1
    for i in urls:
        response = requests.get(i)
        dir_name = 'photos'
        if not os.path.exists(dir_name):
            os.mkdir(dir_name)
        file_name = str(a) + '.jpg'
        with open(dir_name + '/' + file_name, 'wb') as file:
            file.write(response.content)
        a += 1

View Code

查看全文

相关阅读:
cs231n--详解卷积神经网络
 Spring 2017 Assignments2
深度神经网络基础
 cs231n官方note笔记
 Spring 2017 Assignments1
问题
 win7下解决vs2015新建项目，提示“未将对象引用设置到引用实例“的问题
 项目二:人脸识别
 ubutu强制关闭应用程序的方法
 将caj文件转化为pdf文件进行全文下载脚本(ubuntu下亲测有用)

原文地址：https://www.cnblogs.com/cherish-lin/p/14073901.html