1 # -*- coding:UTF-8 -*- 2 import requests, time 3 import json 4 from bs4 import BeautifulSoup as bp 5 6 t3 = time.time() 7 ths = [] # 存放线程 8 9 10 def get(num): 11 n = str(num) 12 page = requests.post('http://www.employees.org/~dwing/aaaa-stats/', 13 ) # 自定义请求头,这些请求头内容是在浏览器上看到的 14 15 t = page.text 16 # print(t) 17 soup = bp(t, 'lxml') # 使用beautifulsoup解析xml文件,解析html时,将xml改为lxml 18 all_body = soup.find_all('tr') # 查找EmailResult标签包含的所有内容,生成一个列表 19 for info in all_body: 20 if not info:continue 21 tds = info.find_all('td') 22 one_row = [] 23 for i in tds: 24 if not i:continue 25 for infos in i: 26 if not infos:continue 27 try: 28 nums = infos.text #所有的数字 29 if not nums:continue 30 nums = nums.strip().strip(' ') 31 # if len(nums) >= 13:continue 32 if len(nums) == 12: 33 total_checked = nums[0:5] 34 with_a_record = nums[5:] 35 one_row.append(total_checked) 36 one_row.append(with_a_record) 37 else: 38 one_row.append(nums) 39 # print(nums,'aaa') 40 except: 41 one_row.append(infos) 42 # print(infos, 'lll') # 百分数 43 44 if ' ' in one_row: 45 one_row.remove(' ') 46 if 'diffs' in one_row: 47 one_row.remove('diffs') 48 if '(large run)' in one_row: 49 continue 50 else: 51 if one_row: 52 if len(one_row) == 16: 53 all_data = {} 54 all_data['date'] = one_row[0] 55 all_data['total_checked'] = one_row[1] 56 all_data['with_A_records_count'] = one_row[2] 57 all_data['with_A_records_rate'] = one_row[3] 58 all_data['with_AAAA_records_count'] = one_row[4] 59 all_data['with_AAAA_records_rate'] = one_row[5] 60 all_data['AAAA_with_IPv4-mapped_count'] = one_row[6] 61 all_data['AAAA_with_IPv4-mapped_rate'] = one_row[7] 62 all_data['AAAA_with_loopback_count'] = one_row[8] 63 all_data['AAAA_with_loopback_rate'] = one_row[9] 64 all_data['valid_AAAA_records_count'] = one_row[10] 65 all_data['valid_AAAA_records_rate'] = one_row[11] 66 all_data['IPv6_connection_ok_count'] = one_row[12] 67 all_data['IPv6_connection_ok_rate'] = one_row[13] 68 all_data['IPv6_connection_failed_count'] = one_row[14] 69 all_data['IPv6_connection_failed_rate'] = one_row[15] 70 # print(one_row) 71 ddd = json.dumps(all_data, indent=2, 72 ensure_ascii=False) # ensure_ascii=False :防止将文字转成unicoe 73 74 with open('dns_status.txt', 'a+') as f: 75 f.write(ddd) 76 # print(i.text,'lll') 77 78 79 get(1) 80 81 t4 = time.time() 82 tt = t4 - t3 83 print(tt)
结果见github