用多进程来获取多个网站的源码
1 import requests
2 from multiprocessing import Pool
3
4 def get_url(url):
5 ret = requests.get(url)
6 return {'url':url,
7 'status_code':ret.status_code,
8 'content':ret.text}
9
10 def parser(dic):
11 print(dic['url'],dic['status_code'],len(dic['content']))
12 with open(dic['url'][7:],'w',encoding='utf-8') as f:
13 f.write(dic['content'])
14 if __name__ == '__main__':
15 url_l = [
16 'http://www.baidu.com',
17 'http://www.sogou.com',
18 'http://www.hao123.com',
19 'http://www.yangxiaoer.cc',
20 'http://www.python.org'
21 ]
22 p = Pool(4)
23 for url in url_l:
24 p.apply_async(get_url,args=(url,),callback=parser)
25 p.close()
26 p.join()