#!/usr/bin/python3 # _*_ coding:utf-8 _*_ ''' 单线程 ''' import os,time import requests from bs4 import BeautifulSoup import uuid def out_wrapper(func): # 记录执行时间的简单装饰器 def inner_wrapper(): start_time = time.time() func() stop_time = time.time() print('Used time {}'.format(stop_time-start_time)) return inner_wrapper def save_flag(img,filename): # 保存图片 path = os.path.join('down_photos',filename) with open(path,'wb') as fp: fp.write(img) def download_one(url): # 下载一个图片 image = requests.get(url) save_flag(image.content,str(uuid.uuid4())) def user_conf(): # 返回30个图片的url url = 'https://unsplash.com/' ret = requests.get(url) soup = BeautifulSoup(ret.text, "html.parser") zzr = soup.find_all('img') ret = [] num = 0 for item in zzr: if item.get("src").endswith('80') and num < 30: num += 1 ret.append(item.get("src")) return ret @out_wrapper def download_many(): zzr = user_conf() for item in zzr: download_one(item) if __name__ == '__main__': download_many()
并发:1多进程 multiprocessing
futures.ProcessPoolExector
2多线程 threading
futures.ThreadPollExecutor map
submit和futures.as_completed
3协成 gevent
asyncio
from multiprocessing import Process from get_photos import out_wrapper,download_one,user_conf @out_wrapper def download_many(): zzr = user_conf() task_list = [] for item in zzr: t = Process(target=download_one,args=(item,)) t.start() task_list.append(t) print(task_list) [t.join() for t in task_list] # 等待进程全部执行完毕(为了记录时间) if __name__ == '__main__': download_many()
from concurrent import futures from get_photos import out_wrapper,download_one,user_conf @out_wrapper def download_many(): zzr =user_conf() with futures.ProcessPoolExecutor(len(zzr)) as executor: res = executor.map(download_one,zzr) return len(list(res)) if __name__ == '__main__': download_many()
import threading from get_photos import out_wrapper,download_one,user_conf @out_wrapper def download_many(): zzr =user_conf() task_list = [] for item in zzr: t = threading.Thread(target=download_one,args=(item,)) t.start() task_list.append(t) [t.join() for t in task_list] if __name__ == '__main__': download_many()
from gevent import monkey monkey.patch_all() import gevent from get_photos import out_wrapper,download_one,user_conf @out_wrapper def download_many(): zzr =user_conf() jobs = [gevent.spawn(download_one,item) for item in zzr] gevent.joinall(jobs) if __name__ == '__main__': download_many()
import uuid import asyncio import aiohttp from get_photos import out_wrapper,user_conf,save_flag async def download_one(url): async with aiohttp.ClientSession() as session: async with session.get(url) as resp: save_flag(await resp.read(),str(uuid.uuid4())) @out_wrapper def download_many(): urls =user_conf() loop = asyncio.get_event_loop() to_do = [download_one(url) for url in urls] wait_coro = asyncio.wait(to_do) res, _=loop.run_until_complete(wait_coro) loop.close() return len(res) if __name__ == '__main__': download_many()
from concurrent import futures from get_photos import out_wrapper,download_one,user_conf @out_wrapper def download_many(): zzr =user_conf() with futures.ThreadPoolExecutor(len(zzr)) as executor: res = executor.map(download_one,zzr) return len(list(res))
from concurrent import futures from get_photos import out_wrapper,download_one,user_conf @out_wrapper def download_many(): zzr =user_conf() with futures.ThreadPoolExecutor(len(zzr)) as executor: to_do = [executor.submit(download_one,item) for item in zzr] ret = [future.result() for future in futures.as_completed(to_do)] return ret if __name__ == '__main__': download_many()