一、select实现Web框架(自定义WEB框架)
浏览器输入:http://127.0.0.1:8888/index.html

1 import select 2 import socket 3 4 5 class Flask(object): 6 def __init__(self, routers): 7 self.routers = routers 8 9 def process_data(self, client): 10 data = bytes() 11 while True: 12 try: 13 trunk = client.recv(1024) # 没有数据会报错, 用户断开也会报错. 14 except BlockingIOError as e: 15 trunk = "" 16 if not trunk: 17 break 18 data += trunk 19 data_str = str(data, encoding='utf8') 20 header, body = data_str.split(' ', 1) 21 header_list = header.split(' ') 22 header_dict = {} 23 for line in header_list: 24 value = line.split(":", 1) 25 if len(value) == 2: 26 k, v = value 27 header_dict[k] = v 28 else: 29 header_dict['mothod'], header_dict['url'], header_dict['protocol'] = line.split(' ') 30 31 return header_dict, body 32 33 def run(self, host='127.0.0.1', port=8888): 34 sock = socket.socket() 35 sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) 36 sock.setblocking(False) 37 sock.bind((host, port)) 38 sock.listen(5) 39 40 inputs = [sock, ] 41 while True: 42 rList, wList, eList = select.select(inputs, [], [], 0.5) 43 for client in rList: 44 # 建立新的连接 45 if client == sock: 46 conn, addr = client.accept() 47 conn.setblocking(False) 48 inputs.append(conn) 49 else: # 用户发送数据 50 header_dict, body = self.process_data(client) 51 request_url = header_dict['url'] 52 func_name = None 53 for item in self.routers: 54 if item[0] == request_url: 55 func_name = item[1] 56 break 57 if not func_name: 58 client.sendall(b"404") 59 else: 60 result = func_name(header_dict, body) 61 client.sendall(result.encode('utf8')) 62 inputs.remove(client) 63 client.close()
1 import flask 2 3 4 def f1(header, body): 5 return "from f1" 6 7 8 def f2(header, body): 9 return "from f2" 10 11 routers = [ 12 ('/index.html', f1), 13 ('/login.html', f2), 14 ] 15 16 obj = flask.Flask(routers) 17 obj.run()
二、自定义Wget(爬虫)
select使用基类的fileno进行,使用foo在中间封装一层,从而增加更多的功能。

1 #!/usr/bin/env python 2 # -*-coding:utf8-*- 3 # __author__ = "willian" 4 import socket 5 import select 6 7 8 # 中间封装一层 9 class Foo(object): 10 def __init__(self, sock, callback, url, host): 11 self.sock = sock 12 self.callback = callback 13 self.url = url 14 self.host = host 15 16 def fileno(self): 17 return self.sock.fileno() 18 19 20 class NbIO(object): 21 def __init__(self): 22 self.fds = [] 23 self.connections = [] 24 25 def connect(self, url_list): 26 for item in url_list: 27 conn = socket.socket() 28 conn.setblocking(False) 29 # 1. 发送链接请求 30 try: 31 conn.connect((item['host'], 80)) 32 except BlockingIOError as e: 33 pass 34 obj = Foo(conn, item['callback'], item['url'], item['host']) 35 self.fds.append(obj) 36 self.connections.append(obj) 37 38 def send(self): 39 while True: 40 if len(self.fds) == 0: 41 break 42 # wList,有对象;当前socket已经创建链接 43 rList, wList, eList = select.select(self.fds, self.connections, self.fds, 0.5) 44 45 for obj in rList: 46 # 4.有数据响应回来了 47 conn = obj.sock 48 data = bytes() 49 while True: 50 try: 51 d = conn.recv(1024) 52 data = data + d 53 except BlockingIOError as e: 54 d = None 55 if not d: 56 break 57 # print(data) 58 obj.callback(data) # 自定义操作 f1 f2 59 self.fds.remove(obj) 60 # print(len(self.fds),len(self.connections)) 61 # 执行当前请求 函数:f1 f2 62 # 【1,2,3,】 63 for obj in wList: 64 # 2.已经连接上远程 65 conn = obj.sock 66 # 3. 发送数据 67 # HTTP/1.1 Host: %s 68 template = "GET %s HTTP/1.1 Host: %s " % (obj.url, obj.host,) 69 # template = "POST %s HTTP/1.1 Host: 127.0.0.1:8888 k1=v1&k2=v2" %(obj.url,) 70 conn.sendall(template.encode('utf-8')) 71 self.connections.remove(obj)
1 import spider 2 3 4 def f1(data): 5 print("