zoukankan      html  css  js  c++  java
  • 获取动态IP

    import requests
    import re
    import lxml.html
    
    
    class Exam_spider:
        def __init__(self):
            self.base_url = 'http://datamining.comratings.com/exam'
            self.s = requests.session()
    
        def down_first(self):
            """
            进行第一次访问
            :return: sessionid
            """
            res = self.s.get(self.base_url)
            sessionid = res.cookies.get_dict().get('session')
            return sessionid
    
        def down_second(self, cookie):
            """
            进行第二次访问
            :param cookie: 访问需要的完整cookie
            :return: 响应结果
            """
            res = self.s.get(self.base_url + '3', cookies=cookie)
            return res.content
    
        def f1(self, a):
            """
            获得js动态加载的cookie
            :param a: 第一次访问获得到的cookie中的sessionid
            :return: js动态加载的cookie
            """
            encoderchars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="
    
            length = len(a)
            i = 0
            b = ""
            while i < length:
                c = ord(a[i]) & 0xff
                i += 1
                if i == length:
                    b += encoderchars[c >> 2]
                    b += encoderchars[(c & 0x3) << 4]
                    b += "=="
                    break
    
                c2 = ord(a[i])
                i += 1
                if i == length:
                    b += encoderchars[c >> 2]
                    b += encoderchars[((c & 0x3) << 4) | ((c2 & 0xf0) >> 4)]
                    b += encoderchars[(c2 & 0xf) << 2]
                    b += "="
                    break
    
                c3 = ord(a[i])
                i += 1
                b += encoderchars[c >> 2]
                b += encoderchars[((c & 0x3) << 4) | ((c2 & 0xf0) >> 4)]
                b += encoderchars[((c2 & 0xf) << 2) | ((c3 & 0xc0) >> 6)]
                b += encoderchars[c3 & 0x3f]
    
            return b
    
        def make_cookie(self, sessionid):
            """
            获得完整的cookie
            :param sessionid: 第一访问得到的sessionid
            :return: 完整的cookie
            """
            lt = []
            lt.append("session=" + sessionid + ';')
            lt.append("c1=" + self.f1(sessionid[1:4]) + ';')
            lt.append("c2=" + self.f1(sessionid))
    
            cookie = {
                'Cookie': " ".join(lt)
            }
            return cookie
    
        def save_result(self, result):
            """
            将结果保存进文件中
            :param result: 第二次访问的响应结果
            :return:
            """
            with open('example_spider_result.html', 'wb') as fp:
                fp.write(result)
    
        def analysis_content(self, result):
            """
            解析文件,得到ip
            :param result:
            :return:
            """
            test_data = result.decode('utf-8')
            pattern = re.compile(r'.([A-Z]+){display:none}')
            class_none_list = pattern.findall(test_data)
            pattern_div = re.compile('<divs.*')
            t = pattern_div.sub("", test_data)
            pattern_span_none = re.compile('<spansstyle="display:none">.*?</span>')
            t1 = pattern_span_none.sub("", t)
    
            pattern_class_none1 = re.compile('<spansclass="' + class_none_list[0] + '">.*</span>')
            t2 = pattern_class_none1.sub("", t1)
            pattern_class_none2 = re.compile('<spansclass="' + class_none_list[1] + '">.*</span>')
            t3 = pattern_class_none2.sub("", t2)
    
            html = lxml.html.fromstring(t3.replace("
    ", ""))
            html_data = html.xpath('//body/descendant-or-self::text()')
            tt = ""
            ln = []
            for i in html_data[1:]:
                if tt.count('.') == 3 and tt[-1] != '.':
                    ln.append(tt)
                    tt = ""
                tt = tt + i
            ln.append(tt)
            print(ln)
            print(len(ln))
    
        def run(self):
            """
            运行主线程
            :return:
            """
            sesionid = self.down_first()
            cookie = self.make_cookie(sesionid)
            result = self.down_second(cookie)
            self.analysis_content(result)
            self.save_result(result)
    
    
    if __name__ == '__main__':
        e = Exam_spider()
        e.run()
  • 相关阅读:
    mysql日期计算转换
    Mysql的DATE_FORMAT()日期格式转换
    JDBC连接池BoneCP
    JSP之三大指令
    JSP的三大指令 七大动作 九大对象
    JSP的语法
    orcale序列操作
    Orcale约束-------外键约束
    Orcale约束-------檢查約束
    Orcale约束-------主键约束
  • 原文地址:https://www.cnblogs.com/liangliangzz/p/10172329.html
Copyright © 2011-2022 走看看