#!/usr/bin/env python
# -*- coding:UTF-8 -*-
import requests
from selenium import webdriver
from bs4 import BeautifulSoup
import re
import time
class GetFansName:
#初始化各配置项数值
def __init__(self, profiles, url, ses, sleepTime, fansNameFile):
self.profiles = profiles
self.url = url
self.ses = ses
self.sleepTime = sleepTime
self.fansNameFile = fansNameFile
def get_cookies(self):
try:
#加载配置文件
profiles = webdriver.FirefoxProfile(self.profiles)
driver = webdriver.Firefox(profiles)
driver.get(self.url+"/followers")
time.sleep(self.sleepTime)
#获取COOKIES
cookies = driver.get_cookies()
# print(cookies)
driver.quit()
return cookies
except Exception as msg:
print("get_cookies error:%s"%str(msg))
def add_cookies(self,cookies):
try:
c=requests.cookies.RequestsCookieJar()
for i in cookies:
c.set(i["name"],i["value"])
#更新COOKIES
self.ses.cookies.update(c)
except Exception as msg:
print("add_cookies error:%s"%str(msg))
def get_fansNum(self):
try:
#发送访问粉丝的请求
fansres = self.ses.get(self.url+"/relation/followers")
fanssoup = BeautifulSoup(fansres.content,"html.parser")
#获取粉丝数量
tempfansnum = fanssoup.find_all(class_="current_nav")
# print(tempfansnum[0].string)
strfansnum = re.findall(u"我的粉丝((.+?))",tempfansnum[0].string)
print(u"我的粉丝数量:%s"%str(strfansnum[0]))
#粉丝分页数量
fansnum = int(int(strfansnum[0])/45)+1
print(u"总的分页:%s"%str(fansnum))
return fansnum
except Exception as msg:
print("get_fansNum error:%s"%str(msg))
return 1
def get_fansName(self,fansnum):
try:
#判断有几页粉丝,然后分别去处理
if fansnum <=1:
url_page=self.url+"/relation/followers"
else:
url_page=self.url+"/relation/followers?page=%s"%str(fansnum)
print("正在抓取页面:%s"%url_page)
fansnameres=self.ses.get(url_page,verify=False)
fansnamesoup=BeautifulSoup(fansnameres.content,"html.parser")
fansnames=fansnamesoup.find_all(class_="avatar_name")
#将粉丝名字写入文件
for fansname in fansnames:
name=fansname.string.replace("
"," ").strip(" ")
with open(self.fansNameFile,'a',encoding="utf-8") as file:
file.write(name+"
")
except Exception as msg:
print("get_fansName error:%s"%str(msg))
if __name__ == '__main__':
#FireFox profile文件路径
profiles = r"C:UsersAdministratorAppDataRoamingMozillaFirefoxProfileswv0f79j4.default"
#要抓取的粉丝的URL
url = "https://home.cnblogs.com/u/NiceTime"
#存放粉丝名字的文件
fansNameFile = "fansNameFile.txt"
#打开浏览器后,等待的时间,单位秒
sleepTime = 5
#获取当前请求的会话
ses = requests.session()
fansName = GetFansName(profiles, url, ses,sleepTime,fansNameFile)
cookies = fansName.get_cookies()
fansName.add_cookies(cookies)
fansNums = fansName.get_fansNum()
for fansNum in range(1, fansNums+1):
fansName.get_fansName(fansNum)