1、地址
http://www.jder.net/meizi/
代码:
#!/usr/bin/python3.6 # -*- coding: utf-8 -*- import requests from bs4 import BeautifulSoup import codecs import time import os.path # 伪装成浏览器进行登录 headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0"} def download_page(url): r = requests.get(url, headers=headers) # 增加headers, 模拟浏览器 return r.text def download_pic(imgUrl): r = requests.get(imgUrl,headers=headers) return r.content def getFilePath(title,link): pic = './/pic' if not os.path.exists(pic): os.makedirs(pic) pass return pic + '/{}.{}'.format(title,link.split('.')[-1]) def get_content(html): soup = BeautifulSoup(html, 'html.parser') main_ll = soup.find_all('div',class_='picture-box') for ll in main_ll: a_tag = ll.find(class_='picture-img').find('a') img = a_tag.find('img') link = img.get('src') start = link.find('=') + 1 end = link.find('&') realLink = link[start:end] title = img.get('alt') r = download_pic(realLink) filepath = getFilePath(title,realLink) with open(filepath,'wb') as f: f.write(r) time.sleep(1) def main(): url = 'http://www.jder.net/meizi/' html = download_page(url) get_content(html) if __name__ == '__main__': main()
附:
BeautifulSoup 使用文档:
https://www.crummy.com/software/BeautifulSoup/bs4/doc.zh/#