zoukankan      html  css  js  c++  java
  • python-自定义异步非阻塞爬虫框架

    api

    import socket
    import select
    
    
    class MySock:
        def __init__(self, sock, data):
            self.sock = sock
            self.data = data
    
        def __getattr__(self, item):
            return getattr(self.sock, item)
    
    
    class YinBing:
        def __init__(self):
            self.r_list = []
            self.w_list = []
    
        def add(self, req_info):
            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            sock.setblocking(0)
            try:
                sock.connect((req_info.get("host"), req_info.get("port")))
            except BlockingIOError:
                pass
            s = MySock(sock, req_info)
            self.r_list.append(s)
            self.w_list.append(s)
    
        def run(self):
            while True:
                rl, wl, el = select.select(self.r_list, self.w_list, [], 0.5)
                for sock in wl:
                    sock.send(("GET %s Http/1.1
    Host:%s
    
    " % (sock.data["path"], sock.data["host"])).encode("utf-8"))
                for sock in rl:
                    response = sock.recv(1024)
                    callback = sock.data.get("callback")
                    if callback: callback(response)
                    self.r_list.remove(sock)
    
                if not self.r_list:
                    break
    

    调用方法

    from 自定义爬虫框架 import YinBing
    
    
    def done1(response):
        print('处理一', response)
    
    
    def done2(response):
        print(response)
    
    
    url_list = [
        {'host': 'www.baidu.com', 'port': 80, 'path': '/', 'callback': done1},
        {'host': 'www.cnblogs.com', 'port': 80, 'path': '/index.html', 'callback': done2},
        {'host': 'www.bing.com', 'port': 80, 'path': '/', 'callback': None},
    ]
    
    if __name__ == '__main__':
        y = YinBing()
        for obj in url_list:
            y.add(obj)
        y.run()
  • 相关阅读:
    Hrbust-1492 盒子(二分图最大匹配)
    数据结构——二叉树的建立和遍历(递归建树&层序遍历建树)
    HDU 1710 二叉树遍历
    HDU 2891
    HDU 2895 贪心 还是 大水题
    POJ 2896 另解暴力
    POJ 2896 AC自动机 or 暴力
    HDU 1714 math
    POJ 1328 贪心
    POJ 2109 巧妙解法
  • 原文地址:https://www.cnblogs.com/LTEF/p/9820955.html
Copyright © 2011-2022 走看看