# 1.明确目的 # 2.找到数据对应的网页 # 3.分析网页的结构找到数据所在的标签的位置 # 4.模拟HTTP请求,向服务器发送这个请求,获取到服务器返回给我们的HTML # 5.用正则表达式提取我们的数据 import re from urllib import request # <div class="video-info"> # <span class="video-title" title="LPL半决赛 IG vs RNG">LPL半决赛 IG vs RNG</span> # <span class="video-nickname" title="LPL英文解说室"> # <i class="icon-host-level icon-host-level-2" data-level="2"></i> # LPL英文解说室 # </span> # <span class="video-number">32.7万</span> # <span class="video-station-info"> # <i class="video-station-num">1人</i> # </span> # </div> class Splider(): url = 'https://www.panda.tv/cate/lol' root_pattern = '<div class="video-info">([sS]*?)</div>' name_pattern = '</i>([sS]*?)</span>' number_pattern = '<span class="video-number">([sS]*?)</span>' def __fetch_content(self): r = request.urlopen(Splider.url) htmls = r.read() htmls = str(htmls,encoding='utf-8') return htmls def __analysis(self,htmls): root_html = re.findall(Splider.root_pattern,htmls) anchors = [] for html in root_html: name = re.findall(Splider.name_pattern,html) number = re.findall(Splider.number_pattern,html) anchor ={'name':name,'number':number} anchors.append(anchor) return anchors def __refine(self,anchors): l = lambda anchor: {'name':anchor['name'][0].strip(),'number':anchor['number'][0]} return map(l,anchors) def __sort(self,anchors): anchors = sorted(anchors,key=self.__sort_seed,reverse=True) return anchors def __show(self,anchors): for rank in range(0,len(anchors)): print('rank ' + str(rank + 1) + ':' + anchors[rank]['name'] + ' ' + anchors[rank]['number']) def __sort_seed(self,anchor): r = re.findall('d*',anchor['number']) number = float(r[0]) if '万' in anchor['number']: number *=10000 return number def go(self): htmls = self.__fetch_content() anchors = self.__analysis(htmls) anchors = list(self.__refine(anchors)) anchors = self.__sort(anchors) self.__show(anchors) splider = Splider() splider.go()
# rank 1: 1715.0万 # rank 2:LPL英文解说室 38.8万 # rank 3:魔兽后裔 16.9万 # rank 4:守卫者 12.4万 # rank 5:药水哥s 5.2万 # rank 6:2d战衣托儿索 3.5万 # rank 7:三枪赵信 3.4万 # rank 8:一只呆萌娜 8932 # rank 9:小黑胖砸 8387 # rank 10:阿涛皎月Carry 6960 # rank 11:熊猫TV丶油菜花1 6700 # rank 12:熊猫丶蛮神 6398 # rank 13:小星k95 6343 # rank 14:老头一必诚 4347 # rank 15:狐狸酱大魔王 4127 # rank 16:杀鸡菜逼俱乐部 3367 # rank 17:一个很C的稻草人 3211 # rank 18:筱筱玉丶微服私访 3080 # rank 19:魔剑神无敌 3007 # rank 20:金三炮丶丶 2881 # rank 21:刀锋秀秀QAQ 2780 # rank 22:我是小二阿 2498 # rank 23:Roumm 2140 # rank 24:冰雪丶狐狸 2085 # rank 25:_木木不酷 1947 # rank 26:有毒i吸血鬼 1846 # rank 27:东北小伙_ 1784 # rank 28:琳琪baby 1780 # rank 29:皮小胖QAQ 1685 # rank 30:阿佑any 1668 # rank 31:美丽可爱栗子哟 1654 # rank 32:大表哥王者蛇女 1645 # rank 33:冷面寒枪人马神 1643 # rank 34:琦玉啊zzz 1639 # rank 35:熊猫TV老泽拉斯 1638 # rank 36:熊猫TV灬美猴王 1635 # rank 37:不会98K的传海 1619 # rank 38:分手何必把锅背走 1604 # rank 39:可乐解忧杂货铺 1569 # rank 40:二言是只喵c 1559 # rank 41:God_of_War龙宝宝 1553 # rank 42:骠骑大将军1 1546 # rank 43:暴力美学小毅 1536 # rank 44:可乐可乐大雪碧 1521 # rank 45:国际女特工 1520 # rank 46:醉梦醒酒 1500 # rank 47:阿毛君2 1499 # rank 48:请叫我越塔怪 1497 # rank 49:社会我墙哥丶 1494 # rank 50:Panda丶冰冰 1493 # rank 51:熊猫丶大风6 1493 # rank 52:初柔_ 1491 # rank 53:熊猫TV天倪 1467 # rank 54:芥子喵i 1464 # rank 55:或许这就是离岛吧 1460 # rank 56:熊猫尼古拉斯胖虎 1451 # rank 57:雁回阿 1446 # rank 58:这个人帅到没朋友 1446 # rank 59:西灬瓜酱 1438 # rank 60:长路漫漫剑圣作伴 1430 # rank 61:幼稚凯卡特 1430 # rank 62:李阿特 1403 # rank 63:小白菜嗷呜 1398 # rank 64:盒你相约_张小白 1388 # rank 65:小哇D卡 1349 # rank 66:武媚儿丶 1340 # rank 67:暴走的青蛙队长 1313 # rank 68:2017英雄联盟全明星 1309 # rank 69:熊猫TV木灵符 1306 # rank 70:酥软迷妹小慢慢Zz 1304 # rank 71:言希cc 1292 # rank 72:熊猫TV丶黑默丁宇 1274 # rank 73:零七Se7en丶 1270 # rank 74:疯疯大魔王 1270 # rank 75:笨蛋真嗣 1259 # rank 76:熊猫TVsao马 1251 # rank 77:诗琪baby_ 1249 # rank 78:小姑爷爷丶 1231 # rank 79:Sye钰神 1227 # rank 80:唐羽菲 1217 # rank 81:甜崽__ 1204 # rank 82:冠胖又帅又皮 1192 # rank 83:繁星yer 1184 # rank 84:顺顺套路王 1181 # rank 85:Panda丶浅唱小生 1175 # rank 86:你的温岚 1169 # rank 87:苏璞呀丶 1159 # rank 88:涛涛段r 1121 # rank 89:糖小鱼丶丶 1110 # rank 90:我就是神仙丶丶 1092 # rank 91:熊猫TV丶sao白 1031 # rank 92:丨空城忆旧巷丨 1019 # rank 93:愿卿好 951 # rank 94:C哥哔哩罢了丶 943 # rank 95:周龍丶 925 # rank 96:熊猫TV桂林168 902 # rank 97:迟到不准时的岛屿 900 # rank 98:嘿we狗丶老污狗 896 # rank 99:努力的杰欧巴 886 # rank 100:我真的没有钱畫訫 841 # rank 101:裙裙裙子 811 # rank 102:Dyz8 795 # rank 103:喵菌i 788 # rank 104:电竞歌后小莲莲丶 785 # rank 105:人生如戏丶戏如命 781 # rank 106:熊猫LCS直播间 764 # rank 107:战士狂魔天 754 # rank 108:忘忧人云彩 749 # rank 109:熊猫第一姬 734 # rank 110:阿祥Q 724 # rank 111:灵魂纯白 722 # rank 112:林如风1 705 # rank 113:熊猫壹棉被 698 # rank 114:熊猫Tv丶K88 690 # rank 115:俊姑豹女丶丶 678 # rank 116:isme明非 674 # rank 117:熊猫tv胖胖虎 671