爬虫 ==》 同步调用
import requests def parse_page(res): print('PARSE %s' %(len(res))) def get_page(url): print('GET %s' %url) response=requests.get(url) if response.status_code == 200: return response.text if __name__ == '__main__': urls=[ 'https://www.baidu.com', 'https://www.taobao.com', 'https://www.openstack.org', ] for url in urls: res=get_page(url) parse_page(res)
import requests from threading import Thread,current_thread def parse_page(res): print('%s PARSE %s' %(current_thread().getName(),len(res))) def get_page(url,callback=parse_page): print('%s GET %s' %(current_thread().getName(),url)) response=requests.get(url) if response.status_code == 200: callback(response.text) if __name__ == '__main__': urls=[ 'https://www.baidu.com', 'https://www.taobao.com', 'https://www.openstack.org', ] for url in urls: t=Thread(target=get_page,args=(url,)) t.start()
import requests from threading import current_thread from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor def parse_page(res): res=res.result() print('%s PARSE %s' %(current_thread().getName(),len(res))) def get_page(url): print('%s GET %s' %(current_thread().getName(),url)) response=requests.get(url) if response.status_code == 200: return response.text if __name__ == '__main__': urls=[ 'https://www.baidu.com', 'https://www.taobao.com', 'https://www.openstack.org', ] pool=ThreadPoolExecutor(50) for url in urls: pool.submit(get_page,url).add_done_callback(parse_page) pool.shutdown(wait=True)
from gevent import joinall,spawn,monkey;monkey.patch_all() import requests from threading import current_thread def parse_page(res): print('%s PARSE %s' %(current_thread().getName(),len(res))) def get_page(url,callback=parse_page): print('%s GET %s' %(current_thread().getName(),url)) response=requests.get(url) if response.status_code == 200: callback(response.text) if __name__ == '__main__': urls=[ 'https://www.baidu.com', 'https://www.taobao.com', 'https://www.openstack.org', ] tasks=[] for url in urls: tasks.append(spawn(get_page,url)) joinall(tasks)