zoukankan      html  css  js  c++  java
  • Matplotlib

    1. Matplotlib 是 Python 的绘图库。 它可与 NumPy 一起使用,提供了一种有效的 MatLab 开源替代方案。
    
    import pandas as pd
    import altair as alt
    %matplotlib inline
    
    
    df = pd.read_csv('pandas-2.csv',encoding = "utf-8",delimiter=",",error_bad_lines=False)
    df.set_index(pd.to_datetime(df["time"]),inplace=True)
    
    # df
    df.loc[:,'throughput'].plot()
    
    
    
    import matplotlib.pyplot as plt
    
    # 画一行
    df.iloc[5].plot()
    plt.show()
    
    # 画10行
    for i in df.index:
        df.iloc[i].plot(label=str(i))
    plt.legend()
    plt.show()
    
    
    # 画一列
    df['A'].plot()
    plt.show()
    
    1. 简单的多个图(subplot)使用示例
    https://blog.csdn.net/leilei7407/article/details/104969532/
    
    import matplotlib.pyplot as plt
    import numpy as np
    # np.random.seed(0)
    x = np.arange(5)
    y = np.random.randint(-5,5,5)
    
    plt.subplot(1,2,1)
    plt.bar(x,y,color='blue')
    #在0位置水平方向添加蓝色的线条
    plt.axhline(0,color='blue',linewidth=2)
    
    plt.subplot(1,2,2)
    #barh 将y和x进行对换,竖着方向为x轴
    plt.barh(x,y,color='red')
    #在0位置垂直方向添加红色线条
    plt.axvline(0,color='red',linewidth=2)
    
    plt.show()
    
    
    1. 简单的多图叠加显示
    https://blog.csdn.net/leilei7407/article/details/104969532/
    
    # plt.legend()函数主要的作用就是给图加上图例
    
    import matplotlib.pyplot as plt
    
    x = [1,2,3,4,5,6,7,8,]
    
    y1 = [1,2,3,4,55,6,6,7]
    y2 = [6,20,9,2,5,8,2,8,]
    
    plt.plot(x,y1)
    plt.plot(x,y2)
    
    plt.legend(['y1','y2'])
    plt.show()
    
    
    
    1. 多层索引画图前需要转换
    
    import pandas as pd
    #多层索引
    df=pd.DataFrame(np.arange(1,5).reshape((4,1)),index=[['a','b','c','d'],['A','B','C','D']],columns=['data',])
    
    # 可有可无的设置index名称
    df=df.rename_axis(index=['index1', 'index2'])
    
    #转换成一层索引
    df=df.reset_index()
    
    #再次变成 多层索引
    df.set_index(['index1', 'index2'], inplace=True)
    
    # columns的name变成index
    plt = df.unstack(fill_value=0)['data']
    plt.rename(columns={'A':'aa', 'B':'b', 'C':'c'}, inplace = True)
    plt
    
    
    1. 百分比柱状图
    https://chrisalbon.com/python/data_visualization/matplotlib_percentage_stacked_bar_plot/
    https://blog.csdn.net/lys_828/article/details/106524459
    
    
    # 官方1(纵向)
    # libraries
    import numpy as np
    import matplotlib.pyplot as plt
    from matplotlib import rc
    import pandas as pd
    
    # Data
    r = [0,1,2,3,4]
    raw_data = {'greenBars': [20, 1.5, 7, 10, 5], 'orangeBars': [5, 15, 5, 10, 15],'blueBars': [2, 15, 18, 5, 10]}
    df = pd.DataFrame(raw_data)
    
    # From raw value to percentage
    totals = [i+j+k for i,j,k in zip(df['greenBars'], df['orangeBars'], df['blueBars'])]
    greenBars = [i / j * 100 for i,j in zip(df['greenBars'], totals)]
    orangeBars = [i / j * 100 for i,j in zip(df['orangeBars'], totals)]
    blueBars = [i / j * 100 for i,j in zip(df['blueBars'], totals)]
    
    # plot
    barWidth = 0.85
    names = ('A','B','C','D','E')
    # Create green Bars
    plt.bar(r, greenBars, color='#b5ffb9', edgecolor='white', width=barWidth)
    # Create orange Bars
    plt.bar(r, orangeBars, bottom=greenBars, color='#f9bc86', edgecolor='white', width=barWidth)
    # Create blue Bars
    plt.bar(r, blueBars, bottom=[i+j for i,j in zip(greenBars, orangeBars)], color='#a3acff', edgecolor='white', width=barWidth)
    
    # Custom x axis
    plt.xticks(r, names)
    plt.xlabel("group")
    
    # Show graphic
    plt.show()
    
    
    
    # 官方2(横向)
    import numpy as np
    import matplotlib.pyplot as plt
    
    category_names = ['Strongly disagree', 'Disagree',
                      'Neither agree nor disagree', 'Agree', 'Strongly agree']
    results = {
        'Question 1': [10, 15, 17, 32, 26],
        'Question 2': [26, 22, 29, 10, 13],
        'Question 3': [35, 37, 7, 2, 19],
        'Question 4': [32, 11, 9, 15, 33],
        'Question 5': [21, 29, 5, 5, 40],
        'Question 6': [8, 19, 5, 30, 38]
    }
    
    def survey(results, category_names):
        """
        Parameters
        ----------
        results : dict
            A mapping from question labels to a list of answers per category.
            It is assumed all lists contain the same number of entries and that
            it matches the length of *category_names*.
        category_names : list of str
            The category labels.
        """
        labels = list(results.keys())
        data = np.array(list(results.values()))
        data_cum = data.cumsum(axis=1)
        category_colors = plt.get_cmap('RdYlGn')(
            np.linspace(0.15, 0.85, data.shape[1]))
    
        fig, ax = plt.subplots(figsize=(9.2, 5))
        ax.invert_yaxis()
        ax.xaxis.set_visible(False)
        ax.set_xlim(0, np.sum(data, axis=1).max())
    
        for i, (colname, color) in enumerate(zip(category_names, category_colors)):
            widths = data[:, i]
            starts = data_cum[:, i] - widths
            ax.barh(labels, widths, left=starts, height=0.5,
                    label=colname, color=color)
            xcenters = starts + widths / 2
    
            r, g, b, _ = color
            text_color = 'white' if r * g * b < 0.5 else 'darkgrey'
            for y, (x, c) in enumerate(zip(xcenters, widths)):
                ax.text(x, y, str(int(c)), ha='center', va='center',
                        color=text_color)
        ax.legend(ncol=len(category_names), bbox_to_anchor=(0, 1),
                  loc='lower left', fontsize='small')
    
        return fig, ax
    
    
    survey(results, category_names)
    plt.show()
    
    
    
    # 修改版本2(纵向)
    import numpy as np
    import matplotlib.pyplot as plt
    import pandas as pd
    
    
    def percentage_bar(df):
        labels = df.index.tolist() #提取分类显示标签,                 # Strongly agree
        results = df.to_dict(orient = 'list')  #将数值结果转化为字典, # key:list
        category_names = list(results.keys()) # 提取字典里面的类别(键-key)# Question 1
        data = np.array(list(results.values())) #提取字典里面的数值(值-value)
    
        category_colors = plt.get_cmap('RdYlGn')(np.linspace(0.15, 0.85, data.shape[0])) # np.linspace 等差数列
        #设置占比显示的颜色,可以自定义,修改括号里面的参数即可,如下
    	#category_colors = plt.get_cmap('hot')(np.linspace(0.15, 0.85, data.shape[0]))
    	
    	
        fig, ax = plt.subplots(figsize=(12, 9)) #创建画布,开始绘图
        ax.invert_xaxis()#这个可以通过设置df中columns的顺序调整
        ax.yaxis.set_visible(False) #设置y轴刻度不可见
        ax.set_xticklabels(labels=labels, rotation=90) #显示x轴标签,并旋转90度
        ax.set_ylim(0,1) #设置y轴的显示范围
        starts = 0  #绘制基准
        for i, (colname, color) in enumerate(zip(category_names, category_colors)):
            heights = data[i,: ]/ data.sum(axis =0) #计算出每次遍历时候的百分比
            ax.bar(labels, heights, bottom=starts, width=0.5,label=colname, color=color,edgecolor ='gray') # 绘制柱状图
            xcenters = starts + heights/2  #进行文本标记位置的选定
            starts += heights  #核心一步,就是基于基准上的百分比累加
            #print(starts) 这个变量就是能否百分比显示的关键,可以打印输出看一下
            percentage_text = data[i,: ]/ data.sum(axis =0) #文本标记的数据
            
            r, g, b, _ = color  # 这里进行像素的分割
            text_color = 'white' if r * g * b < 0.5 else 'k'  #根据颜色基调分配文本标记的颜色
            for y, (x, c) in enumerate(zip(xcenters, percentage_text)):
                ax.text(y, x, f'{round(c*100,2)}%', ha='center', va='center',
                        color=text_color, rotation = 90) #添加文本标记
        ax.legend(ncol=len(category_names), bbox_to_anchor=(0, 1),
                  loc='lower left', fontsize='large') #设置图例
        return fig, ax  #返回图像
    
    
    category_names = ['Strongly disagree', 'Disagree',
                      'Neither agree nor disagree', 'Agree', 'Strongly agree']
    results = {
        'Question 1': [10, 15, 17, 32, 26],
        'Question 2': [26, 22, 29, 10, 13],
        'Question 3': [35, 37, 7, 2, 19],
        'Question 4': [32, 11, 9, 15, 33],
        'Question 5': [21, 29, 5, 5, 40],
        'Question 6': [8, 19, 5, 30, 380]
    }
    
    df = pd.DataFrame(results, index=category_names)
    percentage_bar(df)
    plt.xticks(rotation=0)
    
    
    
    
    # 修改版本2(横向)
    import numpy as np
    import matplotlib.pyplot as plt
    
    category_names = ['Strongly disagree', 'Disagree',
                      'Neither agree nor disagree', 'Agree', 'Strongly agree']
    results = {
        'Question 1': [20, 20, 20, 20, 20],
        'Question 2': [26, 22, 29, 10, 13],
        'Question 3': [35, 37, 7, 2, 19],
        'Question 4': [32, 11, 9, 15, 33],
        'Question 5': [21, 29, 5, 5, 40],
        'Question 6': [8, 19, 5, 30, 38]
    }
    
    df = pd.DataFrame(results, index=category_names)
    
    def survey(df):
        """
        Parameters
        ----------
        results : dict
            A mapping from question labels to a list of answers per category.
            It is assumed all lists contain the same number of entries and that
            it matches the length of *category_names*.
        category_names : list of str
            The category labels.
        """
        category_names = df.index.tolist()
        results = df.to_dict(orient = 'list')  
            
        labels = list(results.keys())                # labels:Question
        data = np.array(list(results.values()))    
        
        # 转换成百分比
        data_cum = data.cumsum(axis=1) /data.sum(axis =1).reshape(len(data),1) # 横向累加,后面的数值是前面的求和
        data=data/data.sum(axis =1).reshape(len(data),1)
        
        #定义颜色
        category_colors = plt.get_cmap('RdYlGn')(
            np.linspace(0.15, 0.85, data.shape[1]))  # np.linspace(0.15, 0.85, 9) 等差数列
    
        fig, ax = plt.subplots(figsize=(9.2, 5))  # 画布的大小
        ax.invert_yaxis()         # 设置row的顺序(Question)
        ax.xaxis.set_visible(False) #设置x轴刻度不可见
        ax.set_xlim(0, np.sum(data, axis=1).max()) #设置y轴的显示范围
        ax.set_xlim(0, 1) #设置y轴的显示范围
        
        for i, (colname, color) in enumerate(zip(category_names, category_colors)):   # category_names:'Strongly disagree'
            widths = data[:, i]
            starts = data_cum[:, i] - widths
            #画条状图
            ax.barh(labels, widths, left=starts, height=0.5,
                    label=colname, color=color)
            xcenters = starts + widths / 2
            # 条状内写对应比例
            r, g, b, _ = color
            text_color = 'white' if r * g * b < 0.5 else 'darkgrey'
            for y, (x, c) in enumerate(zip(xcenters, widths)):
                ax.text(x, y, f'{round(c*100,2)}%', ha='center', va='center',
                        color=text_color)
        #  加上图例       
        ax.legend(ncol=len(category_names), bbox_to_anchor=(0, 1),
                  loc='lower left', fontsize='small')
    
        return fig, ax
    
    
    survey(df)
    plt.show()
    
    1. 实际应用
    
    import pandas as pd
    import numpy as np
    import altair as alt
    import pymysql
    from sqlalchemy import create_engine
    import seaborn    
    import datetime
    import matplotlib.pyplot as plt
    
    
    def survey_percentage(df,T=False):
        """
        """
        if T:
            df =df.T
        category_names = df.index.tolist()
        results = df.to_dict(orient = 'list')  
            
        labels = list(results.keys())                # labels:Question
        data = np.array(list(results.values()))    
        
        # 转换成百分比
        data_cum = data.cumsum(axis=1) /data.sum(axis =1).reshape(len(data),1) # 横向累加,后面的数值是前面的求和
        data=data/data.sum(axis =1).reshape(len(data),1)
        
        #定义颜色
        category_colors = plt.get_cmap('RdYlGn')(
            np.linspace(0.15, 0.85, data.shape[1]))  # np.linspace(0.15, 0.85, 9) 等差数列
    
        fig, ax = plt.subplots(figsize=(9.2, 5))  # 画布的大小
        ax.invert_yaxis()         # 设置row的顺序(Question)
        ax.xaxis.set_visible(False) #设置x轴刻度不可见
        ax.set_xlim(0, np.sum(data, axis=1).max()) #设置y轴的显示范围
        ax.set_xlim(0, 1) #设置y轴的显示范围
        
        for i, (colname, color) in enumerate(zip(category_names, category_colors)):   # category_names:'Strongly disagree'
            widths = data[:, i]
            starts = data_cum[:, i] - widths
            #画条状图
            ax.barh(labels, widths, left=starts, height=0.5,
                    label=colname, color=color)
            xcenters = starts + widths / 2
            # 条状内写对应比例
            r, g, b, _ = color
            text_color = 'white' if r * g * b < 0.5 else 'darkgrey'
            for y, (x, c) in enumerate(zip(xcenters, widths)):
                ax.text(x, y, f'{round(c*100,2)}%', ha='center', va='center',
                        color=text_color)
        #  加上图例       
        ax.legend(ncol=len(category_names), bbox_to_anchor=(0, 1),
                  loc='lower left', fontsize='small')
    
        return fig, ax
    
    
    
    def survey(df,T=False):
        """
        Parameters
        ----------
        results : dict
            A mapping from question labels to a list of answers per category.
            It is assumed all lists contain the same number of entries and that
            it matches the length of *category_names*.
        category_names : list of str
            The category labels.
        """
        if T:
            df =df.T
        category_names = df.index.tolist()
        results = df.to_dict(orient = 'list')  
              
        labels = list(results.keys())
        data = np.array(list(results.values()))
        data_cum = data.cumsum(axis=1)
        category_colors = plt.get_cmap('RdYlGn')(
            np.linspace(0.15, 0.85, data.shape[1]))
    
        fig, ax = plt.subplots(figsize=(9.2, 5))
        ax.invert_yaxis()
        ax.xaxis.set_visible(False)
        ax.set_xlim(0, np.sum(data, axis=1).max())
    
        for i, (colname, color) in enumerate(zip(category_names, category_colors)):
            widths = data[:, i]
            starts = data_cum[:, i] - widths
            ax.barh(labels, widths, left=starts, height=0.5,
                    label=colname, color=color)
            xcenters = starts + widths / 2
    
            r, g, b, _ = color
            text_color = 'white' if r * g * b < 0.5 else 'darkgrey'
            for y, (x, c) in enumerate(zip(xcenters, widths)):
                ax.text(x, y, str(int(c)), ha='center', va='center',
                        color=text_color)
        ax.legend(ncol=len(category_names), bbox_to_anchor=(0, 1),
                  loc='lower left', fontsize='small')
    
        return fig, ax
    
    
    
    
    # 读取数据
    engine = create_engine('mysql+pymysql://root:password@ip:3306/db')
    sql = ''' select case_name ,result, platform_name, error_msg ,report_create_time from report_reportdetail; '''
    df = pd.read_sql_query(sql, engine)
    
    #选取2星期数据
    df = df.set_index(pd.to_datetime(df["report_create_time"])).drop("report_create_time", axis=1)
    week_ago = datetime.date.today() - datetime.timedelta(days=14)
    df = df.sort_index().truncate(before=week_ago)  
    
    
    # 根据 error message区分
    ErrorMsg=df.loc[df['result']=='fail',:].groupby(['error_msg']).count().sort_values(by = ['result'], ascending = [False])
    # select count(result) as i,error_msg  from report_reportdetail where result = 'fail' group by error_msg order by i desc ;
    
    
    # 选取failed case 和指定的平台
    df0 =df.loc[(df['result']=='fail') ,:].copy()
    df1 =df.loc[(df['result']=='fail')&(df['platform_name']=='HWE') ,:].copy()
    df2 =df.loc[(df['result']=='fail')&(df['platform_name']=='HWV') ,:].copy()
    
    #错误消息太长
    df0.loc[:, "error_msg_short"] = df1["error_msg"].str[0:60]
    df1.loc[:, "error_msg_short"] = df1["error_msg"].str[0:60]
    df2.loc[:, "error_msg_short"] = df2["error_msg"].str[0:60]
    
    #多层索引
    df0.set_index(['error_msg_short', 'case_name'], inplace=True) # 1. error_msg_short 为索引,排序就不会乱
    # df0.set_index(['case_name', 'error_msg_short'], inplace=True) # 2. case_name 为索引,排序会乱
    df1.set_index(['case_name', 'error_msg_short'], inplace=True)
    df2.set_index(['case_name', 'error_msg_short'], inplace=True)
    
    #按照索引分组
    df0 =df0.groupby(level=df0.index.names).count()  # 1.2. error_msg_short 为索引,排序就不会乱
    df1 =df1.groupby(level=df1.index.names).count()
    df2 =df2.groupby(level=df2.index.names).count()
    
    # 按结果分析
    # HW=df0.loc[(df0["result"]>0) ,:].rename(columns={'result':'出现次数'}).loc[:,['出现次数']].sort_values(by = ['出现次数'], ascending = [False]) # 1.2. error_msg_short 为索引,排序就不会乱
    ErrorMsgByCase=df0.loc[(df0["result"]>3) ,:].rename(columns={'result':'出现次数'}).loc[:,['出现次数']].sort_values(by = ['出现次数'], ascending = [False]) # 1.2. error_msg_short 为索引,排序就不会乱
    HWE=df1.loc[(df1["result"]>1) ,:].rename(columns={'result':'出现次数'}).loc[:,['出现次数']]
    HWV=df2.loc[(df2["result"]>0) ,:].rename(columns={'result':'出现次数'}).loc[:,['出现次数']]
    
    HWE_ENV=df1.loc[(df1["result"]<=1) ,:].rename(columns={'result':'出现次数'}).loc[:,['出现次数']]
    HWV_ENV=df2.loc[(df2["result"]<=3) ,:].rename(columns={'result':'出现次数'}).loc[:,['出现次数']]
    
    HWE_PR = df1.iloc[df1.index.get_level_values(1).str.contains('PR\d{6}:')]
    HWV_PR = df2.iloc[df2.index.get_level_values(1).str.contains('PR\d{6}:')]
    
    
    ErrorMsgByCase=df0.loc[(df0["result"]>3) ,:].rename(columns={'result':'出现次数'}).loc[:,['出现次数']]
    
    # ErrorMsg
    # ErrorMsgByCase
    # HW
    
    HWE
    HWE_plt = HWE.unstack(fill_value=0)['出现次数']
    HWE_plt.rename(columns={' DL Tput is below 400Mbps!':'DL Tput error', 
                             ' UE 5G attach failed!':'attach failed',
                             ' UL Tput is below 20Mbps!':'UL Tput error'}, 
                    inplace = True)
    HWE_plt.columns # 查看重要
    survey(HWE_plt,T=1)
    
    plt.show()
    
    
    # df.loc[['index1','index2'],:] 
    # HWV
    
    # HWV_PR
    # HWE_PR
    
    # HWE_ENV
    
    
    
    # data = ErrorMsgByCase.reset_index()
    # fg = seaborn.factorplot(y='case_name', x='出现次数',col='error_msg_short', data=data, kind='bar')
    # fg.fig.set_size_inches(18,3)
    # fg.set_xlabels('')
    
    
  • 相关阅读:
    代理
    博客园主题
    JS_1
    脚本语言
    Hadoop生态体系
    Hadoop序列化程序报错
    46. 全排列
    1038 Recover the Smallest Number (30分)
    1064 Complete Binary Search Tree (30分)
    1034 Head of a Gang (30分)
  • 原文地址:https://www.cnblogs.com/amize/p/13977878.html
Copyright © 2011-2022 走看看