为了表示出多图表,数据采用了老师在51Job上爬取的数据
以下是涉及到的代码段
数据库Dao层
#定义一个职位数据操作的数据库访问类 class JobPositionDao(BaseDao): def __init__(self): super().__init__() # 查询各个语言的平均薪资 def findPositionClassify(self): sql = "select avg(job_meansalary),job_taskid,task_title from job_position,job_collect_task where job_position.job_taskid = job_collect_task.task_id group by job_taskid,task_title;" result = self.execute(sql,params=None) self.commit() return self.fetch() # 查询各个语言在不同城市的平均薪资 def findCityPositionClassify(self): sql = "select avg(t1.job_meansalary) as m,t1.job_taskid,t2.task_title,t1.job_city from job_position t1 left join job_collect_task t2 on t1.job_taskid = t2.task_id group by job_taskid,job_city,t2.task_title order by t1.job_taskid asc,m desc;" result = self.execute(sql,params=None) self.commit() return self.fetch() pass # 查询python在不同日期上的岗位新发布数 def findPythonNewCountByDate(self): sql = "SELECT COUNT(1) as count,jc.task_title,jp.FBTime FROM `job_position` as jp,`job_collect_task` as jc WHERE jp.job_taskid=5 GROUP BY jp.job_taskid,jp.FBTime ORDER BY jp.FBTime; " result = self.execute(sql, params=None) self.commit() return self.fetch() pass
图表绘制
from day022.spiderproject.spiderproject.dao.jobpositiondao import JobPositionDao import numpy as np import matplotlib.pyplot as plt class Chart(): def __init__(self,shape:tuple=None): plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus'] = False self.__x,self.__y=shape or (None,None) plt.figure() pass def LineChart(self,dataList,colorList,labelList,xyMsg:tuple = None,title = None,count = 1): # 分块 if self.__x and self.__y: axLine = plt.subplot(self.__x,self.__y,count) plt.sca(axLine)# 选择当前块 for itData,c,l in zip(dataList,colorList,labelList): plt.plot(*itData,c=c,label=l) if xyMsg: plt.xlabel(xyMsg[0]) plt.ylabel(xyMsg[1]) if title: plt.title(title) plt.xticks(rotation=45) #横坐标标签旋转45度 plt.legend(loc='best') pass def pieChart(self,dataList,labelList = None,title=None,count = 1): if self.__x and self.__y: axPie = plt.subplot(self.__x,self.__y,count) plt.sca(axPie) plt.pie(dataList,labels=labelList,autopct="%1.2f")# autopct是比率精度 if title: plt.title(title) plt.axis('equal') pass def barChart(self,xData,yData,count = 1,title = None): if self.__x and self.__y: axBar = plt.subplot(self.__x,self.__y,count) plt.sca(axBar) plt.bar(x=xData,height=yData) for x,v in zip(xData,yData): plt.text(x,v+10,"{0}".format(v),ha="center") if title: plt.title(title) pass def show(self): plt.show() pass # 折线图数据处理和绘制函数 def lineChart(jpDao:JobPositionDao,chartView:Chart): avgList = jpDao.findCityPositionClassify() lineList = {} # 分成{"语言标题":[(城市,薪资)],……}的格式,薪资保留两位小数 for m, i, t, c in avgList: if not lineList.get(t): lineList[t] = [] lineList[t].append((c, round(m, 2))) # 按城市排序 for k in lineList.keys(): lineList[k].sort(key=lambda x: x[0]) minCityList = None # 因为采样数有限,所以取四个语言的样本中都有的城市 for k in lineList.keys(): if minCityList == None: minCityList = set(np.array(lineList[k]).T[0]) continue minCityList &= set(np.array(lineList[k]).T[0]) # 城市排序 minCityList = sorted(list(minCityList)) # 去除已有城市数据 for k in lineList.keys(): i = 0 while i < len(minCityList): if lineList[k][i][0] != minCityList[i]: lineList[k].pop(i) else: i += 1 dataList = [np.array(v).T for v in lineList.values()] dataList = [[it[0],it[1].astype(np.float)] for it in dataList] chartView.LineChart(dataList, ['red', 'green', 'blue', 'black'], [k for k in lineList.keys()], xyMsg=("城市", "工资平均值"), title="四种语言在各个城市中的平均工资") pass def pieChart(jpDao:JobPositionDao,chartView:Chart): avgList = np.array(jpDao.findPositionClassify()).T labelList = [avgList[2][i]+":"+str(avgList[0][i].astype(np.float).round(2)) for i in range(avgList.__len__() + 1)] chartView.pieChart(avgList[0].astype(np.float).round(2),labelList=labelList,title="各个语言的总平均工资",count=2) pass def barChart(jpDao:JobPositionDao,chartView:Chart): avgList =np.array(jpDao.findPythonNewCountByDate()).T chartView.barChart(avgList[2],avgList[0].astype(np.int),count=3,title="Python职位在不同日期的新需求岗位数") jp = JobPositionDao() chartView = Chart(shape=(2,2)) lineChart(jp,chartView) pieChart(jp,chartView) barChart(jp,chartView) chartView.show()
结果