1 import time
2 from concurrent.futures import ThreadPoolExecutor, as_completed, ProcessPoolExecutor
3
4
5 class ThreadPoolSpider:
6 executor = ThreadPoolExecutor(max_workers=8)
7 # executor = ProcessPoolExecutor(max_workers=8)
8
9 def __init__(self):
10 pass
11
12 def http_request(self, url, second):
13 time.sleep(second)
14 return url, second
15
16 def crawl(self):
17 all_tasks = []
18 for index in range(100):
19 task = self.executor.submit(self.http_request, index, 10)
20 all_tasks.append(task)
21
22 for result in as_completed(all_tasks):
23 data = result.result()
24 print(data)
25
26
27 if __name__ == '__main__':
28 spider = ThreadPoolSpider()
29 spider.crawl()
ThreadPoolExecutor:线程池
as_completed()
方法是一个生成器,在没有任务完成的时候,会阻塞,在有某个任务完成的时候,会yield
这个任务,就能执行for循环下面的语句,然后继续阻塞住,循环到所有的任务结束