zoukankan      html  css  js  c++  java
  • pandas:数据可视化

    普通柱状图

    '''
    普通柱状图
    '''
    
    import pandas as pd
    import matplotlib.pyplot as plt
    
    file = '/tmp/Students2.xlsx'
    student = pd.read_excel(file)
    student_filter = student.sort_values(by='Number',ascending=False)
    print(student_filter)
    plt.bar(student_filter.Field,student_filter.Number,color='orange')
    plt.xticks(student_filter.Field,rotation='90')
    plt.xlabel('Field')
    plt.ylabel('Number')
    plt.title('International student by field',fontsize='16')
    plt.tight_layout()
    plt.show()
    
    '''
    原生方法
    '''
    # student_filter.plot.bar(x='Field',y='Number',color='orange',title='International student by field')
    # plt.show()

    分组柱状图

    '''
    分组柱状图
    '''
    
    import pandas as pd
    import matplotlib.pyplot as plt
    
    file = '/tmp/Students3.xlsx'
    student = pd.read_excel(file)
    student_filter = student.sort_values(by='2017',ascending=False)
    print(student_filter)
    # plt.bar(student_filter.Field,[2017,2016],color=['orange','red'])
    # plt.show()
    student_filter.plot.bar('Field',['2016','2017'],color=['orange','red'])
    plt.title('International Students by Field',fontsize=16)
    plt.xlabel('Field',fontweight='bold')
    plt.ylabel('Number',fontweight='bold')
    ax = plt.gca()
    ax.set_xticklabels(student_filter['Field'],rotation=40,ha='right')
    plt.gcf().subplots_adjust(left=0.2,bottom=0.42)
    plt.show()

    叠加柱状图-横向叠加柱状图

    '''
    叠加柱状图
    横向叠加柱状图
    '''
    
    import pandas as pd
    import matplotlib.pyplot as plt
    
    file = '/tmp/Users.xlsx'
    users = pd.read_excel(file)
    users['Total'] = users['Oct'] + users['Nov'] + users['Dec']
    users.sort_values(by='Total',inplace=True,ascending=False)
    print(users)
    
    users.plot.bar(x='Name',y=['Oct','Nov','Dec'],stacked=True)
    # 水平方向叠加
    # users.plot.barh(x='Name',y=['Oct','Nov','Dec'],stacked=True)
    plt.tight_layout()
    plt.show()

    饼状图

    '''
    饼状图
    '''
    
    import pandas as pd
    import matplotlib.pyplot as plt
    
    file = '/tmp/Students.xlsx'
    # 要显示的列为主键列
    students = pd.read_excel(file,index_col='From')
    print(students)
    # 按照2017列排序
    students['2017'].plot.pie(fontsize=8,counterclock=False,startangle=-270)
    plt.title('Source of International Students',fontsize=16,fontweight='bold')
    plt.ylabel('2017',fontsize=12,fontweight='bold')
    plt.show()

    曲线图-叠加曲线图

    '''
    曲线图
    叠加曲线图
    '''
    
    import pandas as pd
    import matplotlib.pyplot as plt
    
    file = '/tmp/Orders.xlsx'
    weeks = pd.read_excel(file,index_col='Week')
    print(weeks)
    
    # 曲线图
    # weeks.plot(y=['Accessories', 'Bikes', 'Clothing', 'Components'])
    weeks.plot.area(y=['Accessories', 'Bikes', 'Clothing', 'Components'])
    plt.title('Sales Trends',fontsize=16,fontweight='bold')
    plt.xticks(weeks.index,fontsize=8)
    plt.show()

    密度图-离散图-直方图

    '''
    密度图
    离散图
    直方图
    '''
    
    import pandas as pd
    import matplotlib.pyplot as plt
    
    pd.options.display.max_columns = 999
    file = '/tmp/home_data.xlsx'
    homes = pd.read_excel(file)
    print(homes.head())
    # 密度图
    # homes.plot.scatter(x='sqft_living',y='price')
    
    # 离散图
    # homes.sqft_living.plot.kde()
    
    # 直方图
    homes.price.plot.hist(bins=200)
    plt.xticks(range(0,max(homes.price),100000),fontsize=8,rotation=90)
    # homes.sqft_living.plot.hist(bins=100)
    # plt.xticks(range(0,max(homes.sqft_living),500),fontsize=8,rotation=90)
    plt.show()
    
    
    # 神奇的相关性
    # print(homes.corr())
  • 相关阅读:
    对象遍历 for in ,数组遍历for in 与 for of 的区别
    计算一个数组中key值相同的数量
    VUE的两种跳转push和replace对比区别
    微信公众号二次分享ios分享失败问题
    获得对象中的键或值
    第一个table根据checkbox选择tr,在另一个table中显示对应索引的tr(jq遍历的运用)
    checkbox 全选反选 获得所有的checkbox
    为什么jQuery要return this.each()?
    用jq代码写出一个轮播图。
    页面滚动到一定位置,两个div 朝中间运动。
  • 原文地址:https://www.cnblogs.com/soymilk2019/p/13862948.html
Copyright © 2011-2022 走看看