获取博客园本人的积分排名数据:
1. 抓包获取积分排名数据返回接口:http://www.cnblogs.com/belle-ls/mvc/blog/sidecolumn.aspx?blogApp=belle-ls
2. 解析返回的数据,获取积分排名
3. 开启线程,循环打印积分排名信息
from bs4 import BeautifulSoup
import time
import requests
import threading
class BlogRankMonitor(object):
"""
获取博客园积分排名
"""
def __init__(self, id):
self.urlBasic = 'http://www.cnblogs.com/%s/mvc/blog/sidecolumn.aspx?blogApp=%s'
self.id = id
self.score = 0
self.rank = 0
def get_nums(self, blogs_des):
"""
分割字符串 积分 - 43 排名 - 283325,获取数字
:return:
"""
split_str = blogs_des.split('-')[1].strip()
return split_str
def get_blog_ranks(self):
"""
解析页面获取博客积分和排名
:return:
"""
url = self.urlBasic % (self.id, self.id)
res = requests.get(url)
soup = BeautifulSoup(res.text, "lxml")
liScore= soup.find(class_="liScore")
for child in liScore.children:
if u'积分' in child.string:
self.score = self.get_nums(child.string)
elif u'排名' in child.string:
self.rank = self.get_nums(child.string)
print("积分:", blog.score, "排名:", blog.rank, "时间:", time.strftime("%Y-%m-%d %X", time.localtime()))
def start_score_rank_thread(self):
t = threading.Thread(target= self.get_blog_ranks())
t.start()
if __name__ == '__main__':
blog = BlogRankMonitor('belle-ls')
while 1:
blog.start_score_rank_thread()
time.sleep(10) #10s打印一次
"""
运行结果:
积分: 45 排名: 0 时间: 2019-02-12 15:57:22
积分: 45 排名: 0 时间: 2019-02-12 15:57:32
积分: 45 排名: 0 时间: 2019-02-12 15:57:42
积分: 45 排名: 0 时间: 2019-02-12 15:57:53
积分: 45 排名: 0 时间: 2019-02-12 15:58:03
....
"""