zoukankan html css js c++ java

利用python爬取网页图片

"""利用python爬取网页图片"""
import requests
import urllib
from bs4 import BeautifulSoup
import json

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36'
}
url = 'http://pic.sogou.com/pics/recommend?category=明星'
response = requests.get(url=url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
print(soup.select('img'))


def getSogouImg(category, length, path):
    url = 'https://pic.sogou.com/pics/channel/getAllRecomPicByTag.jsp?category=' + 
        category + '&tag=全部&start=0&len=' + 
        str(length) + '&width=1920&height=1080'
    imgs = requests.get(url=url, headers=headers)
    jd = json.loads(imgs.text)
    jd = jd['all_items']
    imgs_url = []
    for j in jd:
        imgs_url.append(j['ori_pic_url'])
    m = 0
    for img_url in imgs_url:
        print('------' + str(m) + '.jpg------' + '    Downloading...')
        urllib.request.urlretrieve(img_url, path + str(m) + '.jpg')
        m += 1
    print('Download complete!')

getSogouImg('壁纸', 3000, 'd:/Download/壁纸/')

转载自：https://www.cnblogs.com/dearvee/category/966215.html

查看全文

相关阅读:
如何制作静、动态库
 各种时间函数的恩与怨
 一文看懂Vim操作
 如何避免内存泄漏
 和leon一起学Vim
shell的输入输出重定向
 和Leon一起从头学Git(六)
和leon一起从头学Git(五)
深入理解Linux高端内存
 和Leon一起从头学Git(四)

原文地址：https://www.cnblogs.com/memory-ccy/p/13372121.html