from urllib import request
import gevent, time
from gevent import monkey //在没有加上此句和下一句时,运行速度理论上是一样的,因为gevent检测不到I/O端口
monkey.patch_all()
def f(url):
print('GET:%s'%url)
resp = request.urlopen(url)
data = resp.read()
print('%d bytes received from %s' % (len(data),url))
#用循环的方式爬虫,也就时串行
urls = ['https://www.python.org/','https://www.yahoo.com/']
start_time = time.time()
for url in urls:
f(url)
print('The asynchronous total time is {time}'.format(time = time.time() - start_time))
#用协程方式爬虫
async_time = time.time()
gevent.joinall([gevent.spawn(f,'https://www.python.org/'),
gevent.spawn(f,'https://www.yahoo.com/'),
])
print('The total time is {time}'.format(time = time.time() - async_time))
运行的结果如下:
GET:https://www.python.org/
48835 bytes received from https://www.python.org/
GET:https://www.yahoo.com/
498399 bytes received from https://www.yahoo.com/
The total time is 12.665598630905151
GET:https://www.python.org/
GET:https://www.yahoo.com/
48835 bytes received from https://www.python.org/
498546 bytes received from https://www.yahoo.com/
The asynchronous total time is 5.80000114440918