zoukankan      html  css  js  c++  java
  • pyecharts数据分析及展示

    仅仅从网上爬下数据当然是不够用的,主要还得对数据进行分析与展示,大部分人都看重薪资,但是薪资数据有的是*k/月,有的是*万/月,还有*万/年等等,就要对数据进行清理

    将所有单位统一化,全部换算成统一单位,然后分类薪资范围,在计算各个范围的数量,最后绘图展示

    import pymysql
    import numpy as np
    from pyecharts import Bar
    from pyecharts import Pie
    
    
    class Mysqlhelper(object):
        config = {
            "host": "localhost",
            "user": "root",
            "password": "123456",
            "db": "test",
            "charset": "utf8"
        }
    
        def __init__(self):
            self.connection = None
            self.cursor = None
    
        # 从数据库中查询多行数据
        def getlist(self, sql, *args):
            try:
                self.connection = pymysql.connect(**Mysqlhelper.config)  # **接函数所有参数
                self.cursor = self.connection.cursor()
                self.cursor.execute(sql, args)
                return self.cursor.fetchall()
            except Exception as ex:
                print(ex, ex)
            finally:
                self.close()
    
        def close(self):
            if self.cursor:
                self.cursor.close()
            if self.connection:
                self.connection.close()
    
    
    if __name__ == "__main__":
        count=0
        list = []
        list1 = []
        list2 = [5000,10000,15000,20000,25000,30000,35000,40000]
        salary0 = []
        salary1 = []
        salary2 = []
        salary3 = []
        salary4 = []
        salary5 = []
        salary6 = []
        salary7 = []
        city=[]
        helper = Mysqlhelper()
        rows = helper.getlist("select * from t_job")
    
        #print(rows)
        for n in rows:
            if n[4][-1]=='':
                list.append(n[4])
            elif n[4][-1]=='':
                pass
            elif n[4][-1]=='':
                pass
            else:
                pass
        for sale in list:
            #print(sale)
            money = sale.split('/')
            #print(money[0])
            money1 = money[0].split('-')
            #print(money1)
            if money[0][-1] == '':
                a = float(money1[0]) * 10000
                b = float(money1[1][:-1]) * 10000
                aveage = (a + b) / 2
                count+=1
                list1.append(aveage)
            elif money[0][-1]=='':
                a = float(money1[0]) * 1000
                b = float(money1[1][:-1]) * 1000
                #print(a)
                #print(b)
                aveage = (a + b) / 2
                #print(aveage)
                count += 1
                list1.append(aveage)
        #print(count)
        #print(list1)
        for i in list1:
            print(i)
            if 0 < i <= 5000:
                salary0.append(i)
            elif 5000 < i <= 10000:
                salary1.append(i)
            elif 10000 < i <= 15000:
                salary2.append(i)
            elif 15000 < i <= 20000:
                salary3.append(i)
            elif 20000 < i <= 25000:
                salary4.append(i)
            elif 25000 < i <= 30000:
                salary5.append(i)
            elif 30000 < i <= 35000:
                salary6.append(i)
            elif 35000 < i <= 40000:
                salary7.append(i)
        print(min(list1))
        print(max(list1))
        a = len(salary0)
        b = len(salary1)
        c = len(salary2)
        d = len(salary3)
        e = len(salary4)
        f = len(salary5)
        g = len(salary6)
        h = len(salary7)
        list3=[a,b,c,d,e,f,g,h]
        print(list2)   #x轴
        print(a,b,c,d,e,f,g,h)
        print(list3)   #数量
    
    
        bar = Bar('Python平均工资')
        bar.add("月薪", list2,list3)
        # bar.show_config()
        bar.render('Python工资柱状图.html')
    
        pie = Pie()
        pie.add("", list2, list3, is_label_show=True)
        #pie.show_config()
        pie.render('Python工资饼状图.html')
        '''
    
        #print(rows)
        citycount=[]
        cityname=['北京','异地招聘','海淀区','朝阳区','丰台区','昌平区','东城区','延庆区',
                  '房山区','通州区','顺义区','大兴区','怀柔区','西城区','平谷区','门头沟区']
        beijing=[]
        yidi=[]
    
        haidian=[]
        chaoyang=[]
        fengtai=[]
        changping=[]
        dongcheng=[]
        yanqing=[]
        fangshan=[]
        tongzhou=[]
        shunyi=[]
        daxing=[]
        huairou=[]
        xicheng=[]
        pinggu=[]
        mentougou=[]
    
    
        for n in rows:
            #print(n[3])
            area=n[3].split('-')
            print(area)
            if len(area)==1:
                print(area[0])
                city.append(area[0])
            else:
                print(area[1])
                city.append(area[1])
        print(city)
        print(len(city))
        for i in city:
            if i=='北京':
                beijing.append(i)
            elif i=='异地招聘':
                yidi.append(i)
            elif i=='海淀区':
                haidian.append(i)
            elif i == '朝阳区':
                chaoyang.append(i)
            elif i=='丰台区':
                fengtai.append(i)
            elif i=='昌平区':
                changping.append(i)
            elif i=='东城区':
                dongcheng.append(i)
            elif i=='延庆区':
                yanqing.append(i)
            elif i=='房山区':
                fangshan.append(i)
            elif i=='通州区':
                tongzhou.append(i)
            elif i=='顺义区':
                shunyi.append(i)
            elif i=='大兴区':
                daxing.append(i)
            elif i=='怀柔区':
                huairou.append(i)
            elif i=='西城区':
                xicheng.append(i)
            elif i=='平谷区':
                pinggu.append(i)
            elif i=='门头沟区':
                mentougou.append(i)
    
        #print(beijing)
        #print(len(beijing))
    
        a = len(beijing)
        b = len(yidi)
        c = len(haidian)
        d = len(chaoyang)
        e = len(fengtai)
        f = len(changping)
        g = len(dongcheng)
        h = len(yanqing)
        j = len(fangshan)
        k = len(tongzhou)
        l = len(shunyi)
        m = len(daxing)
        n = len(huairou)
        o = len(xicheng)
        p = len(pinggu)
        q = len(mentougou)
        citycount=[a,b,c,d,e,f,g,h,j,k,l,m,n,o,p,q]
        print(cityname)
        print(citycount)
    
        pie = Pie()
        pie.add("", cityname, citycount, is_label_show=True)
        # pie.show_config()
        pie.render('北京各区Python职位占比饼状图.html')
    
        bar = Bar('北京各区职位数量')
        bar.add("数量", cityname, citycount)
        # bar.show_config()
        bar.render('北京各区Python职位占比柱状图.html')
        
        '''

    前面写的是数据库的操作函数,其实可以封装成一个py文件,以后使用直接调用即可。

    结果。:

    我也分析了boss直聘网站的一些数据,类似于经验要求和学历要求等等,也可以自己分析想要的数据。

    import pymysql
    import numpy as np
    from pyecharts import Bar
    from pyecharts import Pie
    import jieba
    from collections import Counter
    from os import  path
    
    class Mysqlhelper(object):
        config={
            "host":"localhost",
            "user":"root",
            "password":"123456",
            "db":"test",
            "charset":"utf8"
        }
    
        def __init__(self):
            self.connection=None
            self.cursor=None
    
        # 从数据库中查询多行数据
        def getlist(self, sql, *args):
            try:
                self.connection = pymysql.connect(**Mysqlhelper.config)  # **接函数所有参数
                self.cursor = self.connection.cursor()
                self.cursor.execute(sql, args)
                return self.cursor.fetchall()
            except Exception as ex:
                print(ex,ex)
            finally:
                self.close()
    
        def close(self):
            if self.cursor:
                self.cursor.close()
            if self.connection:
                self.connection.close()
    
    if __name__=="__main__":
        sale=[]
        exp=[]
        edu=[]
        one = []
        three = []
        five = []
        onein = []
        noexp = []
        qita=[]
        benke=[]
        dazhuan=[]
        noedu=[]
        boshi=[]
        other=[]
        helper = Mysqlhelper()
        rows = helper.getlist("select * from boss_job")
        #print(rows)
    
        for data in rows:
            #print(data[2])
            #print(data[5])
            #print(data[6])
            sale.append(data[2])
            exp.append(data[5])
            edu.append(data[6])
            if data[5]=='1-3年':
                one.append(data[5])
            elif data[5]=='3-5年':
                three.append(data[5])
            elif data[5]=='5-10年':
                five.append(data[5])
            elif data[5]=='经验不限':
                noexp.append(data[5])
            elif data[5]=='1年以内':
                onein.append(data[5])
            else:
                qita.append(data[5])
                pass
            if data[6]=='本科':
                benke.append(data[6])
            elif data[6]=='大专':
                dazhuan.append(data[6])
            elif data[6]=='博士':
                boshi.append(data[6])
            elif data[6]=='学历不限':
                noedu.append(data[6])
            else:
                other.append(data[6])
    
    
    
        #     with open('./data/jingyan.txt', 'a', encoding='utf-8') as fp:
        #         fp.write(data[5])
        #         fp.write(',')
        #         fp.flush()
        #         fp.close()
        print(exp)
        print(edu)
        print(len(exp))
        print(len(edu))
    
        '''
        d = path.dirname(__file__)
        jingyan_text = open(path.join(d, "data//jingyan.txt"), encoding='utf-8').read()
        print(len(jingyan_text))
    
        jieba.load_userdict("data//jingyan_dict.txt")
    
        seg_list = jieba.cut_for_search(jingyan_text)
        print(u"[全模式]: ", "/ ".join(seg_list))
        '''
        # sanguo_words = [x for x in jieba.cut(jingyan_text)if x!=','and len(x) >=2]
        # c = Counter(sanguo_words).most_common(20)
        # print(c)
        # print(''.join(jieba.cut(jingyan_text)))
    
        print(one)
        print(three)
        print(five)
        print(noexp)
        print(onein)
        print(qita)
        a=len(one)
        b=len(three)
        c=len(five)
        d=len(noexp)
        e=len(onein)
        f=len(qita)
        expcount=[f,e,a,b,c,d]
        expfenlei=['应届生','1年以内','1-3年','3-5年','5-10年','经验不限']
        print(expcount)
        print(a+b+c+d+e+f)
    
        print(other)
        g=len(benke)
        h=len(dazhuan)
        j=len(boshi)
        k=len(noedu)
        m=len(other)
        educount=[h,g,k,j,m]
        edufenlei=['大专','本科','硕士','博士','学历不限']
        print(educount)
    
        '''
        bar = Bar('工作年限')
        bar.add("要求", expfenlei, expcount)
        # bar.show_config()
        bar.render('工作年限柱状图.html')
    
        pie = Pie()
        pie.add("工作", expfenlei, expcount, is_label_show=True)
        # pie.show_config()
        pie.render('工作年限饼状图.html')
        '''
    
        bar = Bar('学历要求')
        bar.add("学历", edufenlei, educount)
        # bar.show_config()
        bar.render('学历要求柱状图.html')
    
        pie = Pie()
        pie.add("学历", edufenlei, educount, is_label_show=True)
        # pie.show_config()
        pie.render('学历要求饼状图.html')

    我使用的是最基本的数组方法,不知道有什么简单方法么,例如jieba分词模块,等等

    可以看出本科生需求还是很大的。。。

  • 相关阅读:
    Python 编程快速上手 第八章总结
    Python 编程快速上手 第七章总结
    Python 编程快速上手 第六章总结
    Python 编程快速上手 第五章总结
    Processing 与 C 相同和不同的地方
    learn python the hard way习题31~40总结以及列表的扩展知识
    while循环
    初识python
    第九章 类
    第八章 函数
  • 原文地址:https://www.cnblogs.com/yuxuanlian/p/9838657.html
Copyright © 2011-2022 走看看