1 #!/usr/bin/env python 2 # -*- coding:utf-8 -*- 3 4 # 通过非阻塞io实现http请求 5 import socket 6 from urllib.parse import urlparse 7 from selectors import DefaultSelector, EVENT_READ, EVENT_WRITE 8 9 selector = DefaultSelector() 10 11 urls = ['http://www.baidu.com'] 12 stop = False 13 14 15 # 使用select完成http请求 16 class Fetcher: 17 def connected(self, key): 18 # 注销事件 19 selector.unregister(key.fd) 20 self.client.send("GET {} HTTP/1.1 Host:{} Connection:close ".format(self.path, self.host).encode('utf-8')) 21 # 22 selector.register(self.client.fileno(), EVENT_READ, self.readable) 23 24 def readable(self, key): 25 d = self.client.recv(1024) 26 if d: 27 self.data += d 28 else: 29 # 注销 30 selector.unregister(key.fd) 31 self.data = self.data.decode('utf-8') 32 html_data = self.data.split(' ')[1] 33 print(self.data) 34 print(html_data) 35 self.client.close() 36 urls.remove(self.spider_url) 37 if not urls: 38 global stop 39 stop = True 40 41 def get_url(self, url): 42 self.spider_url = url 43 # 通过socket请求html 44 url = urlparse(url) 45 self.host = url.netloc 46 self.path = url.path 47 self.data = b"" 48 if self.path == '': 49 self.path = '/' 50 51 # 建立连接 52 self.client = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 53 self.client.setblocking(False) 54 55 try: 56 self.client.connect((self.host, 80)) 57 except BlockingIOError as e: 58 pass 59 60 # 注册事件 61 selector.register(self.client.fileno(), EVENT_WRITE, self.connected) 62 """ 63 register(fileobj, events, data=None) 64 fileobj:文件描述符 65 events:监听事件 66 data:回调函数 67 68 """ 69 70 71 def loop(): 72 # 事件循环,不停的请求socket的状态并调用对应的回调函数 73 # 1.select本身是不支持register模式。 74 # 2.socket状态变化以后的回调是由程序员完成的。 75 while not stop: 76 # windows下会报错,但是linux下不会报错 77 ready = selector.select() 78 for key, mask in ready: 79 call_back = key.data 80 call_back(key) 81 82 83 if __name__ == '__main__': 84 fetcher = Fetcher() 85 fetcher.get_url('http://www.baidu.com') 86 loop()
使用回调函数的缺点:
1.可读性差
2.共享状态管理困难
3.异常处理困难