zoukankan      html  css  js  c++  java
  • 【11.3】select+回调+事件循环获取html

     1 #!/usr/bin/env python
     2 # -*- coding:utf-8 -*-
     3 
     4 # 通过非阻塞io实现http请求
     5 import socket
     6 from urllib.parse import urlparse
     7 from selectors import DefaultSelector, EVENT_READ, EVENT_WRITE
     8 
     9 selector = DefaultSelector()
    10 
    11 urls = ['http://www.baidu.com']
    12 stop = False
    13 
    14 
    15 # 使用select完成http请求
    16 class Fetcher:
    17     def connected(self, key):
    18         # 注销事件
    19         selector.unregister(key.fd)
    20         self.client.send("GET {} HTTP/1.1
    Host:{}
    Connection:close
    
    ".format(self.path, self.host).encode('utf-8'))
    21         #
    22         selector.register(self.client.fileno(), EVENT_READ, self.readable)
    23 
    24     def readable(self, key):
    25         d = self.client.recv(1024)
    26         if d:
    27             self.data += d
    28         else:
    29             # 注销
    30             selector.unregister(key.fd)
    31             self.data = self.data.decode('utf-8')
    32             html_data = self.data.split('
    
    ')[1]
    33             print(self.data)
    34             print(html_data)
    35             self.client.close()
    36             urls.remove(self.spider_url)
    37             if not urls:
    38                 global stop
    39                 stop = True
    40 
    41     def get_url(self, url):
    42         self.spider_url = url
    43         # 通过socket请求html
    44         url = urlparse(url)
    45         self.host = url.netloc
    46         self.path = url.path
    47         self.data = b""
    48         if self.path == '':
    49             self.path = '/'
    50 
    51         # 建立连接
    52         self.client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    53         self.client.setblocking(False)
    54 
    55         try:
    56             self.client.connect((self.host, 80))
    57         except BlockingIOError as e:
    58             pass
    59 
    60         # 注册事件
    61         selector.register(self.client.fileno(), EVENT_WRITE, self.connected)
    62         """
    63         register(fileobj, events, data=None)
    64         fileobj:文件描述符
    65         events:监听事件
    66         data:回调函数
    67         
    68         """
    69 
    70 
    71 def loop():
    72     # 事件循环,不停的请求socket的状态并调用对应的回调函数
    73     # 1.select本身是不支持register模式。
    74     # 2.socket状态变化以后的回调是由程序员完成的。
    75     while not stop:
    76         # windows下会报错,但是linux下不会报错
    77         ready = selector.select()
    78         for key, mask in ready:
    79             call_back = key.data
    80             call_back(key)
    81 
    82 
    83 if __name__ == '__main__':
    84     fetcher = Fetcher()
    85     fetcher.get_url('http://www.baidu.com')
    86     loop()

     使用回调函数的缺点:

      1.可读性差

      2.共享状态管理困难

      3.异常处理困难

  • 相关阅读:
    LeetCode 40. 组合总和 II(Combination Sum II)
    LeetCode 129. 求根到叶子节点数字之和(Sum Root to Leaf Numbers)
    LeetCode 60. 第k个排列(Permutation Sequence)
    LeetCode 47. 全排列 II(Permutations II)
    LeetCode 46. 全排列(Permutations)
    LeetCode 93. 复原IP地址(Restore IP Addresses)
    LeetCode 98. 验证二叉搜索树(Validate Binary Search Tree)
    LeetCode 59. 螺旋矩阵 II(Spiral Matrix II)
    一重指针和二重指针
    指针的意义
  • 原文地址:https://www.cnblogs.com/zydeboke/p/11328584.html
Copyright © 2011-2022 走看看