##声明:
破解思路由同事提供一些思路完成破解,原文章是他整理的我拷贝过来的,进一步完善封装的的代码我已附上,如有引用或者转发请附上地址或经由我2人其一人同意即可,谢谢~
打开一个页面, 发现字体文件地址是动态的, 这个倒是好说, 写个正则, 就可以动态匹配出来
先下载下来一个新页面的字体文件, 做一下对比, 如图
头脑风暴ing.gif
(与伙伴对话ing...)
不着急, 还是要冷静下来, 再想想哪里还有突破点
同一个页面的字体文件地址是动态的, 但是, 里面的字体编码和顺序是不会变的呀
可以使用某一个页面的字体文件做一个标准的字体映射表呀!
好像发现了新世界的大门, 可门还没开开, 就被自己堵死了, 就想 做出来映射表然后呢!(又要奔腾了)
想呀想呀想呀想, 最后叫上小伙伴一起想
突然就想到了, 虽然那么多不一样, 但是, 但是, 相同文字的坐标点相同呀 ! 突然又打开了大门
首先排除特别的文字的情况下, 只是在这个字体文件的情况下, 60%的字坐标点一样
那剩下的怎么办呢! 先不管了, 先把这60%给弄出来
def extract_ttf_file(self, file_name, get_word_map=True): _font = TTFont(file_name) uni_list = _font.getGlyphOrder()[1:] # 被替换的字体的列表 word_list = [ " 坏 " , " 少 " , " 远 " , " 大 " , " 九 " , " 左 " , " 近 " , " 呢 " , " 十 " , " 高 " , " 着 " , " 矮 " , " 八 " , " 二 " , " 右 " , " 是 " , " 得 " , " 的 " , " 小 " , " 短 " , " 很 " , " 一 " , " 了 " , " 地 " , " 好 " , " 多 " , " 七 " , " 不 " , " 长 " , " 低 " , " 三 " , " 五 " , " 六 " , " 下 " , " 更 " , " 和 " , " 四 " , " 上 " ] utf_word_map = {} utf_coordinates_map = {} for index, uni_code in enumerate(uni_list): utf_word_map[uni_code] = word_list[index] utf_coordinates_map[uni_code] = list(_font[' glyf ' ][uni_code].coordinates) if get_word_map: return utf_word_map, utf_coordinates_map return utf_coordinates_map # self.local_utf_word_map, self.local_utf_coordinates_map = self.extract_ttf_file(self.local_ttf_name)
# According one font file to get font code and font, then use them to make a dict map named font_rule like: {font_code: font} (all the new font code will All data will be based on this table to take the corresponding information))
font_rule = {
"edd2":"坏",...
"eca5":"四",
"ede5":"上"}
下载要破解的字体文件, 并替换标准编码字体映射表
会得到22个字体的映射表, 共38个:
接下来, 就用坐标点来解决, 以下为思路
使用两点坐标差来判断, 但是这个偏差值拿不准
相同文字, 坐标点数量必须一致, 即所有坐标点“(y-x)的平方差的绝对值”的和最小的就为同一个字。
公式:(x1-x2)**2 + (y1-y2)**2
来先试试
然后在重组标准编码, 标准坐标, 新的编码, 和新坐标
(这是想, 找出最相近的坐标, 使用新坐标提取出标准编码, 然后用标准编码提取对应的文字, 在替换成使用本页用的编码映射表 )
提取所有坐标点加起来最小的元素
替换, 生成新的标准映射表
在以上替换60%的字体映射表再加入一个判断, 改成如下
输出一个标准的坐标值, 这里我就不上图进行对比了, 经过对比, 发现没什么问题
# -*- coding: utf-8 -*- # @Author: Mehaei # @Date: 2020-01-10 14:51:53 # @Last Modified by: Mehaei # @Last Modified time: 2020-01-13 10:10:13 import re import os import requests from lxml import etree from fontTools.ttLib import TTFont class NotFoundFontFileUrl(Exception): pass class CarHomeFont(object): def __init__ (self, url, *args, **kwargs): self.local_ttf_name = " norm_font.ttf " self.download_ttf_name = ' new_font.ttf ' self.new_unicode_map = {} self._making_local_code_map() self._download_ttf_file(url, self.download_ttf_name) def _download_ttf_file(self, url, file_name): self.page_html = self.download(url) or "" # 获取字体的连接文件 font_file_name = (re.findall(r" ,url('(//.*.ttf)?') format " , self.page_html) or ["" ])[0] if not font_file_name: raise NotFoundFontFileUrl(" not found font file name " ) # 下载字体文件 file_content = self.download(" https:%s " % font_file_name, content=True) # 讲字体文件保存到本地 with open(file_name, ' wb ' ) as f: f.write(file_content) print (" font file download success " ) def _making_local_code_map(self): if not os.path.exists(self.local_ttf_name): # 这个url为标准字体文件地址, 如要更改, 请手动更改字体列表 url = " https://club.autohome.com.cn/bbs/thread/62c48ae0f0ae73ef/75904283-1.html " self._download_ttf_file(url, self.local_ttf_name) self.local_utf_word_map, self.local_utf_coordinates_map = self.extract_ttf_file(self.local_ttf_name) print (" local ttf load done " ) def get_distence(self, norm_coordinate, new_coordinate): distance_total = 0 for index, coordinate_point in enumerate(norm_coordinate): distance_total += abs(new_coordinate[index][0] - coordinate_point[0]) + abs(new_coordinate[index][1] - coordinate_point[1]) return distance_total def handle_subtraction(self, coordinate_equal_list): coordinate_min_list = [] for coordinate_equal in coordinate_equal_list: n = self.get_distence(coordinate_equal.get(' norm_coordinate ' ), coordinate_equal.get(' new_coordinate ' )) coordinate_min_list.append(n) return coordinate_equal_list[coordinate_min_list.index(min(coordinate_min_list))] def replace_ttf_map(self): unicode_mlist_map = [] new_utf_coordinates_map = self.extract_ttf_file(self.download_ttf_name, get_word_map=False) for local_unicode, local_coordinate in self.local_utf_coordinates_map.items(): coordinate_equal_list = [] for new_unicode, new_coordinate in new_utf_coordinates_map.items(): if len(new_coordinate) == len(local_coordinate): coordinate_equal_list.append({ " norm_key " : local_unicode, " norm_coordinate " : local_coordinate, " new_key " : new_unicode, " new_coordinate " : new_coordinate}) if len(coordinate_equal_list) == 1: unicode_mlist_map.append(coordinate_equal_list[0]) elif len(coordinate_equal_list) > 1: min_word = self.handle_subtraction(coordinate_equal_list) unicode_mlist_map.append(min_word) for unicode_dict in unicode_mlist_map: self.new_unicode_map[unicode_dict[ " new_key " ]] = self.local_utf_word_map[unicode_dict[" norm_key " ]] print (" new unicode map extract success
" , self.new_unicode_map) def extract_ttf_file(self, file_name, get_word_map=True): _font = TTFont(file_name) uni_list = _font.getGlyphOrder()[1:] # 被替换的字体的列表 word_list = [ " 坏 " , " 少 " , " 远 " , " 大 " , " 九 " , " 左 " , " 近 " , " 呢 " , " 十 " , " 高 " , " 着 " , " 矮 " , " 八 " , " 二 " , " 右 " , " 是 " , " 得 " , " 的 " , " 小 " , " 短 " , " 很 " , " 一 " , " 了 " , " 地 " , " 好 " , " 多 " , " 七 " , " 不 " , " 长 " , " 低 " , " 三 " , " 五 " , " 六 " , " 下 " , " 更 " , " 和 " , " 四 " , " 上 " ] utf_word_map = {} utf_coordinates_map = {} for index, uni_code in enumerate(uni_list): utf_word_map[uni_code] = word_list[index] utf_coordinates_map[uni_code] = list(_font[' glyf ' ][uni_code].coordinates) if get_word_map: return utf_word_map, utf_coordinates_map return utf_coordinates_map def repalce_source_code(self): replaced_html = self.page_html for utf_code, word in self.new_unicode_map.items(): replaced_html = replaced_html.replace(" &#x%s; " % utf_code[3:].lower(), word) return replaced_html def get_subject_content(self): normal_html = self.repalce_source_code() # 使用xpath 获取 主贴 xp_html = etree.HTML(normal_html) subject_text = '' .join(xp_html.xpath(' //div[@xname="content"]//div[@class="tz-paragraph"]//text() ' )) return subject_text def download(self, url, *args, try_time=5, method=" GET " , content=False, **kwargs): kwargs.setdefault( " headers " , {}) kwargs[ " headers " ].update({" User-Agent " : " Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36 " }) while try_time: try : response = requests.request(method.upper(), url, *args, **kwargs) if response.ok: if content: return response.content return response.text else : continue except Exception as e: try_time -= 1 print (" download error: %s " % e) if __name__ == " __main__ " : url = " https://club.autohome.com.cn/bbs/thread/34d6bcc159b717a9/85794510-1.html#pvareaid=6830286 " car = CarHomeFont(url) car.replace_ttf_map() text = car.get_subject_content() print (text)
1 import os
2
3 from fontTools.ttLib import TTFont
4 from utils.font_rule import font_rule
5 import logging
6 logging.getLogger(" chardet " ).setLevel(logging.WARNING)
7 logging.getLogger(" fontTools " ).setLevel(logging.WARNING)
8
9
10 def get_distance(t_lst, t_lst2):
11 distance_lst = []
12 for i, el in enumerate(t_lst):
13 distance = (t_lst2[i][0] - el[0]) ** 2 + (t_lst2[i][1] - el[1]) ** 2
14 distance_lst.append(distance)
15 # print('[distance_lst]', distance_lst)
16 num = 0
17 for n in distance_lst:
18 num += n
19 return num
20
21
22 def handle_subtraction(lst):
23 n_lst = []
24 for d in lst:
25 old_index = d.get(' old_index ' )
26 new_index = d.get(' new_index ' )
27 # n = test(old_index, new_index)
28 n = get_distance(old_index, new_index)
29 n_lst.append(n)
30 # print('[n_lst]', n_lst)
31 min_n = min(n_lst)
32 # print('[min_n]', min_n)
33 return lst[n_lst.index(min_n)]
34 # for i, num in enumerate(n_lst):
35 # if num == min_n:
36 # r_dic = lst[i]
37 # return r_dic
38
39
40 def get_font_map_lst(old_font_lst, new_font_lst, old_font, new_font):
41 font_map_lst = []
42 for el in old_font_lst:
43 u_lst = []
44 coordinates = list(old_font[' glyf ' ][el].coordinates)
45 for j in new_font_lst:
46 new_coordinates = list(new_font[' glyf ' ][j].coordinates)
47 if len(coordinates) == len(new_coordinates):
48 dic = {' old_key ' : f' {el[-4:]} ' , ' new_key ' : f' {j[-4:]} ' , ' old_index ' : coordinates,
49 ' new_index ' : new_coordinates}
50 u_lst.append(dic)
51 if len(u_lst) > 1:
52 r_lst = handle_subtraction(u_lst)
53 font_map_lst.append(r_lst)
54 elif len(u_lst) == 1:
55 font_map_lst.append(u_lst.pop())
56 # print('[len_map_font]', len(font_map_lst))
57 return font_map_lst
58
59
60 def get_font_map(font_map_lst):
61 font_dic = {}
62 for map_dic in font_map_lst:
63 old_key = map_dic.get(' old_key ' )
64 new_key = map_dic.get(' new_key ' )
65 font = font_rule.get(old_key.lower())
66 if not font:
67 print (map_dic)
68 font_dic[new_key.lower()]=str(font)
69 return font_dic
70
71
72 def decrypt_font(new_file_name):
73 old_file_name = ' old_font.ttf '
74 if not os.path.exists(old_file_name):
75 old_file_name = ' ./utils/old_font.ttf '
76 new_file_name = new_file_name
77 old_font = TTFont(old_file_name)
78 # print(old_font)
79 new_font = TTFont(new_file_name)
80 old_font_lst = old_font.getGlyphOrder()[1:]
81 print (' [old_font_lst] ' , old_font_lst)
82 new_font_lst = new_font.getGlyphOrder()[1:]
83 print (' [new_font_lst] ' , new_font_lst)
84 font_map_lst = get_font_map_lst(old_font_lst, new_font_lst, old_font, new_font)
85 # print('font_map_lst', font_map_lst)
86 font_map = get_font_map(font_map_lst)
87 # print('[new_font_map]', font_map)
88 return font_map
89
90
91 if __name__ == ' __main__ ' :
92 # old_file_name = 'old_font.ttf'
93 new_file_name = ' new.ttf '
94 # old_font = TTFont(old_file_name)
95 # new_font = TTFont(new_file_name)
96 # #
97 # old_font_lst = old_font.getGlyphOrder()[1:]
98 # new_font_lst = new_font.getGlyphOrder()[1:]
99 # print(old_font_lst)
100 # print(new_font_lst)
101 # for obvious
102 # old_font.saveXML("old_font.xml")
103 # new_font.saveXML('new.xml')
104 # font_map_lst = get_font_map_lst(old_font_lst, new_font_lst)
105 # font_map = get_font_map(font_map_lst)
106 font_map = decrypt_font(new_file_name)
107 print (font_map)
108 print (' [len_font_map] ' , len(font_map))
进一步封装完善代码