1.Tornado的基本使用
2.Tornado源码剖析
scrapy异步IO模块:理解为socket客户端
tornado异步非阻塞:理解为socket服务端
客户端socket异步:(scrapy)
""" 目标:单线程实现并发HTTP请求 1.socket 2.HTTP协议 3.IO多路复用 流程:https://www.cnblogs.com/staff/p/13139545.html 1.socket连接,IP和端口进行连接 2.请求信息 请求头: k=v k=v ... 请求体 socket.sendall() """ ''' 1.建连接(阻塞) 2.发请求 3.返回数据(阻塞) 4.断开连接 ''' import socket import select class HttpRequest: def __init__(self, sock, item): self.sock = sock self.item = item def fileno(self): return self.sock.fileno() class AsyncHttp: def __init__(self): self.list_connections = [] self.list_sockets = [] def start(self, item): try: sock = socket.socket() sock.setblocking(False) # 设置为不阻塞的socket,这样sockt的通信不能自动完成 sock.connect((item['host'], 80,)) except BlockingIOError as e: self.list_sockets.append(HttpRequest(sock, item)) self.list_connections.append(HttpRequest(sock, item)) def run(self): while True: """循环监测,返回值r,w。 w有值代表连接成功,可以发请求了。 r有值代表请求已经得到响应了,可以收数据了。 传的参数是被监控的[socket0, socket1, socket2]列表 """ r, w, x = select.select(self.list_connections, self.list_sockets, [], 0.05) for http_request in w: """连接成功了,可以发请求了""" host = http_request.item['host'] url = http_request.item['url'] content = 'GET %s HTTP/1.0 Host:%s ' % (url, host) http_request.sock.sendall(content.encode("utf-8")) self.list_sockets.remove(http_request) for http_request in r: """请求得到响应,接收数据""" data = http_request.sock.recv(8096) http_request.sock.close() http_request.item['callback'](data) # 回调 self.list_connections.remove(http_request) if len(self.list_connections) == 0: break def callback(data): print(data) # 调用方式 ep_io = AsyncHttp() list_url = [ {"host": "www.baidu.com", "url": "/", "callback": callback}, {"host": "www.bing.com", "url": "/", "callback": callback}, {"host": "www.cnblogs.com", "url": "/staff/p/13139545.html", "callback": callback}, ] for item in list_url: print(item) ep_io.start(item) ep_io.run()
服务端socket异步:(tornado)
#!/usr/bin/env python # -*- coding:utf-8 -*- import re import socket import select import time ''' 服务端异步实现原理: 在视图函数最后yield Future对象,Future对象内部封装了flag和callback函数, 在服务端维护一个字典,字典的key是本次异步连接的socket对象,值是yield的Future对象 服务端每隔5毫秒遍历一次这个字典,Future值,为true就执行callback函数,并结束本次异步连接 ''' class HttpResponse(object): """ 封装响应信息 """ def __init__(self, content=''): self.content = content self.headers = {} self.cookies = {} def response(self): return bytes(self.content, encoding='utf-8') class HttpNotFound(HttpResponse): """ 404时的错误提示 """ def __init__(self): super(HttpNotFound, self).__init__('404 Not Found') class HttpRequest(object): """ 用户封装用户请求信息 """ def __init__(self, conn): self.conn = conn self.header_bytes = bytes() self.header_dict = {} self.body_bytes = bytes() self.method = "" self.url = "" self.protocol = "" self.initialize() self.initialize_headers() def initialize(self): header_flag = False while True: try: received = self.conn.recv(8096) except Exception as e: received = None if not received: break if header_flag: self.body_bytes += received continue temp = received.split(b' ', 1) if len(temp) == 1: self.header_bytes += temp else: h, b = temp self.header_bytes += h self.body_bytes += b header_flag = True @property def header_str(self): return str(self.header_bytes, encoding='utf-8') def initialize_headers(self): headers = self.header_str.split(' ') first_line = headers[0].split(' ') if len(first_line) == 3: self.method, self.url, self.protocol = headers[0].split(' ') for line in headers: kv = line.split(':') if len(kv) == 2: k, v = kv self.header_dict[k] = v class Future(object): """ 异步非阻塞模式时封装回调函数以及是否准备就绪 """ def __init__(self, callback): self.callback = callback self._ready = False self.value = None def set_result(self, value=None): self.value = value self._ready = True @property def ready(self): return self._ready class TimeoutFuture(Future): """ 异步非阻塞超时 """ def __init__(self, timeout): super(TimeoutFuture, self).__init__(callback=None) self.timeout = timeout self.start_time = time.time() @property def ready(self): current_time = time.time() if current_time > self.start_time + self.timeout: self._ready = True return self._ready class Snow(object): """ 微型Web框架类 """ def __init__(self, routes): self.routes = routes self.inputs = set() self.request = None self.async_request_handler = {} def run(self, host='localhost', port=9999): """ 事件循环 :param host: :param port: :return: """ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # socket.AF_INET IPv4(默认)/ socket.SOCK_STREAM 流式socket , for TCP (默认) sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) # 默认配置,不写也没关系 sock.bind((host, port,)) sock.setblocking(False) # 非阻塞,recv无数据则报错 sock.listen(128) # sock.setblocking(0) self.inputs.add(sock) try: while True: # 每0.005秒监视一次socket,看是不是有人连服务器,或者有人向服务器发了信息 readable_list, writeable_list, error_list = select.select(self.inputs, [], self.inputs, 0.005) for conn in readable_list: # 可读了,有人连或者有人发信息 if sock == conn: # 有人来连服务端socket client, address = conn.accept() client.setblocking(False) self.inputs.add(client) else: # 有人向服务端发数据 gen = self.process(conn) if isinstance(gen, HttpResponse): conn.sendall(gen.response()) self.inputs.remove(conn) conn.close() else: yielded = next(gen) # 由next触发视图函数最后的`yield Future`,yielded就是Future对象 self.async_request_handler[conn] = yielded self.polling_callback() except Exception as e: pass finally: sock.close() def polling_callback(self): """ 遍历触发异步非阻塞的回调函数 :return: """ for conn in list(self.async_request_handler.keys()): yielded = self.async_request_handler[conn] if not yielded.ready: continue if yielded.callback: ret = yielded.callback(self.request, yielded) conn.sendall(ret.response()) self.inputs.remove(conn) del self.async_request_handler[conn] conn.close() def process(self, conn): """ 处理路由系统以及执行函数 :param conn: :return: """ self.request = HttpRequest(conn) func = None for route in self.routes: if re.match(route[0], self.request.url): func = route[1] break if not func: return HttpNotFound() else: return func(self.request)
使用:
from snow import Snow from snow import HttpResponse from snow import TimeoutFuture request_list = [] def async(request): obj = TimeoutFuture(5) yield obj def home(request): return HttpResponse('home') routes = [ (r'/home/', home), (r'/async/', async), ] app = Snow(routes) app.run(port=8012)