zoukankan      html  css  js  c++  java
  • 汽车之家反爬

    修改转换编码方式进行破解

    只是为练习字体反爬

    #!/usr/bin/env python  
    # encoding: utf-8  
    from requests_html import HTMLSession
    import re
    import os
    from fontTools.ttLib import TTFont
    
    class QiCheZhiJia():
        def __init__(self):
            self.url="https://club.autohome.com.cn/bbs/thread/bb8c36ced93ce182/74203500-1.html"
            self.hanzi=['不','了','呢','更','是','四','小','七','三','多','得','一','着','下','十','少','长','二','六','远','左','地','短','九','五','上','坏','很','右','低','高','矮','八','近','大','好','的','和']
            self.session=HTMLSession()
            self.f_dict={}
            self.uniWordList=[]
            self.utf8WordList=[]
        def create_font(self,font_url):
            # 列出已下载文件
            font_file=font_url.split('/')[-1]
    
            if not os.path.exists("./fonts"):
                os.makedirs("./fonts")
            file_list = os.listdir('./fonts')
    
            if font_file not in file_list:
                # 未下载则下载新库
                print('不在字体库中, 下载:', font_file)
                new_file = self.session.get(font_url).content
                with open('./fonts/' + font_file, 'wb') as f:
                    f.write(new_file)
                font = TTFont('./fonts/' + font_file)
            else:
                font = TTFont('./fonts/' + font_file)
                gly_list = font.getGlyphOrder()[1:]
            gly_list = font.getGlyphOrder()[1:]
            for number,gly in enumerate(gly_list):
                self.f_dict.setdefault(gly.lower().replace('uni','&#x'),self.hanzi[number])
            self.uniWordList = font['cmap'].tables[0].ttFont.getGlyphOrder()
            self.utf8WordList = [uniWord.replace("uni",r"u").encode('utf-8').decode("unicode-escape") for uniWord in self.uniWordList[1:]]
            print(self.utf8WordList)
        def run(self):
            req=self.session.get(self.url)
            source=req.text
            font_url=self.parse(source)
            self.create_font(font_url)
            info=req.html.xpath("//div[@class='tz-paragraph' and string-length(text())>1]//text()")
            print(info)
            elem=""
            for item in info:
                elem += item
            for i in range(len(self.utf8WordList)):
                # 将自定的字体信息,替换成国际标准
                elem = elem.replace(self.utf8WordList[i], self.hanzi[i])
            print(elem)
    
        def parse(self,source):
            plat=re.compile("'),url('(.*?)')")
            font_url="http:"+plat.findall(source)[0]
            return font_url
    if __name__ == '__main__':
        QiCheZhiJia().run()
    
  • 相关阅读:
    jmeter(46) redis
    jmeter(45) tcp/ip协议
    Codeforces Round #538 (Div. 2)D(区间DP,思维)
    Codeforces Global Round 1D(DP,思维)
    Educational Codeforces Round 57D(DP,思维)
    UPC11073(DP,思维)
    Yahoo Progamming Contest 2019D(DP,思维)
    Atcoder Beginner Contest 118D(DP,完全背包,贪心)
    Xuzhou Winter Camp 1C(模拟)
    Educational Codeforces Round 57 (Rated for Div. 2)D(动态规划)
  • 原文地址:https://www.cnblogs.com/c-x-a/p/9288841.html
Copyright © 2011-2022 走看看