- Matplotlib 是 Python 的绘图库。 它可与 NumPy 一起使用,提供了一种有效的 MatLab 开源替代方案。
import pandas as pd
import altair as alt
%matplotlib inline
df = pd.read_csv('pandas-2.csv',encoding = "utf-8",delimiter=",",error_bad_lines=False)
df.set_index(pd.to_datetime(df["time"]),inplace=True)
# df
df.loc[:,'throughput'].plot()
import matplotlib.pyplot as plt
# 画一行
df.iloc[5].plot()
plt.show()
# 画10行
for i in df.index:
df.iloc[i].plot(label=str(i))
plt.legend()
plt.show()
# 画一列
df['A'].plot()
plt.show()
- 简单的多个图(subplot)使用示例
https://blog.csdn.net/leilei7407/article/details/104969532/
import matplotlib.pyplot as plt
import numpy as np
# np.random.seed(0)
x = np.arange(5)
y = np.random.randint(-5,5,5)
plt.subplot(1,2,1)
plt.bar(x,y,color='blue')
#在0位置水平方向添加蓝色的线条
plt.axhline(0,color='blue',linewidth=2)
plt.subplot(1,2,2)
#barh 将y和x进行对换,竖着方向为x轴
plt.barh(x,y,color='red')
#在0位置垂直方向添加红色线条
plt.axvline(0,color='red',linewidth=2)
plt.show()
- 简单的多图叠加显示
https://blog.csdn.net/leilei7407/article/details/104969532/
# plt.legend()函数主要的作用就是给图加上图例
import matplotlib.pyplot as plt
x = [1,2,3,4,5,6,7,8,]
y1 = [1,2,3,4,55,6,6,7]
y2 = [6,20,9,2,5,8,2,8,]
plt.plot(x,y1)
plt.plot(x,y2)
plt.legend(['y1','y2'])
plt.show()
- 多层索引画图前需要转换
import pandas as pd
#多层索引
df=pd.DataFrame(np.arange(1,5).reshape((4,1)),index=[['a','b','c','d'],['A','B','C','D']],columns=['data',])
# 可有可无的设置index名称
df=df.rename_axis(index=['index1', 'index2'])
#转换成一层索引
df=df.reset_index()
#再次变成 多层索引
df.set_index(['index1', 'index2'], inplace=True)
# columns的name变成index
plt = df.unstack(fill_value=0)['data']
plt.rename(columns={'A':'aa', 'B':'b', 'C':'c'}, inplace = True)
plt
- 百分比柱状图
https://chrisalbon.com/python/data_visualization/matplotlib_percentage_stacked_bar_plot/
https://blog.csdn.net/lys_828/article/details/106524459
# 官方1(纵向)
# libraries
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc
import pandas as pd
# Data
r = [0,1,2,3,4]
raw_data = {'greenBars': [20, 1.5, 7, 10, 5], 'orangeBars': [5, 15, 5, 10, 15],'blueBars': [2, 15, 18, 5, 10]}
df = pd.DataFrame(raw_data)
# From raw value to percentage
totals = [i+j+k for i,j,k in zip(df['greenBars'], df['orangeBars'], df['blueBars'])]
greenBars = [i / j * 100 for i,j in zip(df['greenBars'], totals)]
orangeBars = [i / j * 100 for i,j in zip(df['orangeBars'], totals)]
blueBars = [i / j * 100 for i,j in zip(df['blueBars'], totals)]
# plot
barWidth = 0.85
names = ('A','B','C','D','E')
# Create green Bars
plt.bar(r, greenBars, color='#b5ffb9', edgecolor='white', width=barWidth)
# Create orange Bars
plt.bar(r, orangeBars, bottom=greenBars, color='#f9bc86', edgecolor='white', width=barWidth)
# Create blue Bars
plt.bar(r, blueBars, bottom=[i+j for i,j in zip(greenBars, orangeBars)], color='#a3acff', edgecolor='white', width=barWidth)
# Custom x axis
plt.xticks(r, names)
plt.xlabel("group")
# Show graphic
plt.show()
# 官方2(横向)
import numpy as np
import matplotlib.pyplot as plt
category_names = ['Strongly disagree', 'Disagree',
'Neither agree nor disagree', 'Agree', 'Strongly agree']
results = {
'Question 1': [10, 15, 17, 32, 26],
'Question 2': [26, 22, 29, 10, 13],
'Question 3': [35, 37, 7, 2, 19],
'Question 4': [32, 11, 9, 15, 33],
'Question 5': [21, 29, 5, 5, 40],
'Question 6': [8, 19, 5, 30, 38]
}
def survey(results, category_names):
"""
Parameters
----------
results : dict
A mapping from question labels to a list of answers per category.
It is assumed all lists contain the same number of entries and that
it matches the length of *category_names*.
category_names : list of str
The category labels.
"""
labels = list(results.keys())
data = np.array(list(results.values()))
data_cum = data.cumsum(axis=1)
category_colors = plt.get_cmap('RdYlGn')(
np.linspace(0.15, 0.85, data.shape[1]))
fig, ax = plt.subplots(figsize=(9.2, 5))
ax.invert_yaxis()
ax.xaxis.set_visible(False)
ax.set_xlim(0, np.sum(data, axis=1).max())
for i, (colname, color) in enumerate(zip(category_names, category_colors)):
widths = data[:, i]
starts = data_cum[:, i] - widths
ax.barh(labels, widths, left=starts, height=0.5,
label=colname, color=color)
xcenters = starts + widths / 2
r, g, b, _ = color
text_color = 'white' if r * g * b < 0.5 else 'darkgrey'
for y, (x, c) in enumerate(zip(xcenters, widths)):
ax.text(x, y, str(int(c)), ha='center', va='center',
color=text_color)
ax.legend(ncol=len(category_names), bbox_to_anchor=(0, 1),
loc='lower left', fontsize='small')
return fig, ax
survey(results, category_names)
plt.show()
# 修改版本2(纵向)
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
def percentage_bar(df):
labels = df.index.tolist() #提取分类显示标签, # Strongly agree
results = df.to_dict(orient = 'list') #将数值结果转化为字典, # key:list
category_names = list(results.keys()) # 提取字典里面的类别(键-key)# Question 1
data = np.array(list(results.values())) #提取字典里面的数值(值-value)
category_colors = plt.get_cmap('RdYlGn')(np.linspace(0.15, 0.85, data.shape[0])) # np.linspace 等差数列
#设置占比显示的颜色,可以自定义,修改括号里面的参数即可,如下
#category_colors = plt.get_cmap('hot')(np.linspace(0.15, 0.85, data.shape[0]))
fig, ax = plt.subplots(figsize=(12, 9)) #创建画布,开始绘图
ax.invert_xaxis()#这个可以通过设置df中columns的顺序调整
ax.yaxis.set_visible(False) #设置y轴刻度不可见
ax.set_xticklabels(labels=labels, rotation=90) #显示x轴标签,并旋转90度
ax.set_ylim(0,1) #设置y轴的显示范围
starts = 0 #绘制基准
for i, (colname, color) in enumerate(zip(category_names, category_colors)):
heights = data[i,: ]/ data.sum(axis =0) #计算出每次遍历时候的百分比
ax.bar(labels, heights, bottom=starts, width=0.5,label=colname, color=color,edgecolor ='gray') # 绘制柱状图
xcenters = starts + heights/2 #进行文本标记位置的选定
starts += heights #核心一步,就是基于基准上的百分比累加
#print(starts) 这个变量就是能否百分比显示的关键,可以打印输出看一下
percentage_text = data[i,: ]/ data.sum(axis =0) #文本标记的数据
r, g, b, _ = color # 这里进行像素的分割
text_color = 'white' if r * g * b < 0.5 else 'k' #根据颜色基调分配文本标记的颜色
for y, (x, c) in enumerate(zip(xcenters, percentage_text)):
ax.text(y, x, f'{round(c*100,2)}%', ha='center', va='center',
color=text_color, rotation = 90) #添加文本标记
ax.legend(ncol=len(category_names), bbox_to_anchor=(0, 1),
loc='lower left', fontsize='large') #设置图例
return fig, ax #返回图像
category_names = ['Strongly disagree', 'Disagree',
'Neither agree nor disagree', 'Agree', 'Strongly agree']
results = {
'Question 1': [10, 15, 17, 32, 26],
'Question 2': [26, 22, 29, 10, 13],
'Question 3': [35, 37, 7, 2, 19],
'Question 4': [32, 11, 9, 15, 33],
'Question 5': [21, 29, 5, 5, 40],
'Question 6': [8, 19, 5, 30, 380]
}
df = pd.DataFrame(results, index=category_names)
percentage_bar(df)
plt.xticks(rotation=0)
# 修改版本2(横向)
import numpy as np
import matplotlib.pyplot as plt
category_names = ['Strongly disagree', 'Disagree',
'Neither agree nor disagree', 'Agree', 'Strongly agree']
results = {
'Question 1': [20, 20, 20, 20, 20],
'Question 2': [26, 22, 29, 10, 13],
'Question 3': [35, 37, 7, 2, 19],
'Question 4': [32, 11, 9, 15, 33],
'Question 5': [21, 29, 5, 5, 40],
'Question 6': [8, 19, 5, 30, 38]
}
df = pd.DataFrame(results, index=category_names)
def survey(df):
"""
Parameters
----------
results : dict
A mapping from question labels to a list of answers per category.
It is assumed all lists contain the same number of entries and that
it matches the length of *category_names*.
category_names : list of str
The category labels.
"""
category_names = df.index.tolist()
results = df.to_dict(orient = 'list')
labels = list(results.keys()) # labels:Question
data = np.array(list(results.values()))
# 转换成百分比
data_cum = data.cumsum(axis=1) /data.sum(axis =1).reshape(len(data),1) # 横向累加,后面的数值是前面的求和
data=data/data.sum(axis =1).reshape(len(data),1)
#定义颜色
category_colors = plt.get_cmap('RdYlGn')(
np.linspace(0.15, 0.85, data.shape[1])) # np.linspace(0.15, 0.85, 9) 等差数列
fig, ax = plt.subplots(figsize=(9.2, 5)) # 画布的大小
ax.invert_yaxis() # 设置row的顺序(Question)
ax.xaxis.set_visible(False) #设置x轴刻度不可见
ax.set_xlim(0, np.sum(data, axis=1).max()) #设置y轴的显示范围
ax.set_xlim(0, 1) #设置y轴的显示范围
for i, (colname, color) in enumerate(zip(category_names, category_colors)): # category_names:'Strongly disagree'
widths = data[:, i]
starts = data_cum[:, i] - widths
#画条状图
ax.barh(labels, widths, left=starts, height=0.5,
label=colname, color=color)
xcenters = starts + widths / 2
# 条状内写对应比例
r, g, b, _ = color
text_color = 'white' if r * g * b < 0.5 else 'darkgrey'
for y, (x, c) in enumerate(zip(xcenters, widths)):
ax.text(x, y, f'{round(c*100,2)}%', ha='center', va='center',
color=text_color)
# 加上图例
ax.legend(ncol=len(category_names), bbox_to_anchor=(0, 1),
loc='lower left', fontsize='small')
return fig, ax
survey(df)
plt.show()
- 实际应用
import pandas as pd
import numpy as np
import altair as alt
import pymysql
from sqlalchemy import create_engine
import seaborn
import datetime
import matplotlib.pyplot as plt
def survey_percentage(df,T=False):
"""
"""
if T:
df =df.T
category_names = df.index.tolist()
results = df.to_dict(orient = 'list')
labels = list(results.keys()) # labels:Question
data = np.array(list(results.values()))
# 转换成百分比
data_cum = data.cumsum(axis=1) /data.sum(axis =1).reshape(len(data),1) # 横向累加,后面的数值是前面的求和
data=data/data.sum(axis =1).reshape(len(data),1)
#定义颜色
category_colors = plt.get_cmap('RdYlGn')(
np.linspace(0.15, 0.85, data.shape[1])) # np.linspace(0.15, 0.85, 9) 等差数列
fig, ax = plt.subplots(figsize=(9.2, 5)) # 画布的大小
ax.invert_yaxis() # 设置row的顺序(Question)
ax.xaxis.set_visible(False) #设置x轴刻度不可见
ax.set_xlim(0, np.sum(data, axis=1).max()) #设置y轴的显示范围
ax.set_xlim(0, 1) #设置y轴的显示范围
for i, (colname, color) in enumerate(zip(category_names, category_colors)): # category_names:'Strongly disagree'
widths = data[:, i]
starts = data_cum[:, i] - widths
#画条状图
ax.barh(labels, widths, left=starts, height=0.5,
label=colname, color=color)
xcenters = starts + widths / 2
# 条状内写对应比例
r, g, b, _ = color
text_color = 'white' if r * g * b < 0.5 else 'darkgrey'
for y, (x, c) in enumerate(zip(xcenters, widths)):
ax.text(x, y, f'{round(c*100,2)}%', ha='center', va='center',
color=text_color)
# 加上图例
ax.legend(ncol=len(category_names), bbox_to_anchor=(0, 1),
loc='lower left', fontsize='small')
return fig, ax
def survey(df,T=False):
"""
Parameters
----------
results : dict
A mapping from question labels to a list of answers per category.
It is assumed all lists contain the same number of entries and that
it matches the length of *category_names*.
category_names : list of str
The category labels.
"""
if T:
df =df.T
category_names = df.index.tolist()
results = df.to_dict(orient = 'list')
labels = list(results.keys())
data = np.array(list(results.values()))
data_cum = data.cumsum(axis=1)
category_colors = plt.get_cmap('RdYlGn')(
np.linspace(0.15, 0.85, data.shape[1]))
fig, ax = plt.subplots(figsize=(9.2, 5))
ax.invert_yaxis()
ax.xaxis.set_visible(False)
ax.set_xlim(0, np.sum(data, axis=1).max())
for i, (colname, color) in enumerate(zip(category_names, category_colors)):
widths = data[:, i]
starts = data_cum[:, i] - widths
ax.barh(labels, widths, left=starts, height=0.5,
label=colname, color=color)
xcenters = starts + widths / 2
r, g, b, _ = color
text_color = 'white' if r * g * b < 0.5 else 'darkgrey'
for y, (x, c) in enumerate(zip(xcenters, widths)):
ax.text(x, y, str(int(c)), ha='center', va='center',
color=text_color)
ax.legend(ncol=len(category_names), bbox_to_anchor=(0, 1),
loc='lower left', fontsize='small')
return fig, ax
# 读取数据
engine = create_engine('mysql+pymysql://root:password@ip:3306/db')
sql = ''' select case_name ,result, platform_name, error_msg ,report_create_time from report_reportdetail; '''
df = pd.read_sql_query(sql, engine)
#选取2星期数据
df = df.set_index(pd.to_datetime(df["report_create_time"])).drop("report_create_time", axis=1)
week_ago = datetime.date.today() - datetime.timedelta(days=14)
df = df.sort_index().truncate(before=week_ago)
# 根据 error message区分
ErrorMsg=df.loc[df['result']=='fail',:].groupby(['error_msg']).count().sort_values(by = ['result'], ascending = [False])
# select count(result) as i,error_msg from report_reportdetail where result = 'fail' group by error_msg order by i desc ;
# 选取failed case 和指定的平台
df0 =df.loc[(df['result']=='fail') ,:].copy()
df1 =df.loc[(df['result']=='fail')&(df['platform_name']=='HWE') ,:].copy()
df2 =df.loc[(df['result']=='fail')&(df['platform_name']=='HWV') ,:].copy()
#错误消息太长
df0.loc[:, "error_msg_short"] = df1["error_msg"].str[0:60]
df1.loc[:, "error_msg_short"] = df1["error_msg"].str[0:60]
df2.loc[:, "error_msg_short"] = df2["error_msg"].str[0:60]
#多层索引
df0.set_index(['error_msg_short', 'case_name'], inplace=True) # 1. error_msg_short 为索引,排序就不会乱
# df0.set_index(['case_name', 'error_msg_short'], inplace=True) # 2. case_name 为索引,排序会乱
df1.set_index(['case_name', 'error_msg_short'], inplace=True)
df2.set_index(['case_name', 'error_msg_short'], inplace=True)
#按照索引分组
df0 =df0.groupby(level=df0.index.names).count() # 1.2. error_msg_short 为索引,排序就不会乱
df1 =df1.groupby(level=df1.index.names).count()
df2 =df2.groupby(level=df2.index.names).count()
# 按结果分析
# HW=df0.loc[(df0["result"]>0) ,:].rename(columns={'result':'出现次数'}).loc[:,['出现次数']].sort_values(by = ['出现次数'], ascending = [False]) # 1.2. error_msg_short 为索引,排序就不会乱
ErrorMsgByCase=df0.loc[(df0["result"]>3) ,:].rename(columns={'result':'出现次数'}).loc[:,['出现次数']].sort_values(by = ['出现次数'], ascending = [False]) # 1.2. error_msg_short 为索引,排序就不会乱
HWE=df1.loc[(df1["result"]>1) ,:].rename(columns={'result':'出现次数'}).loc[:,['出现次数']]
HWV=df2.loc[(df2["result"]>0) ,:].rename(columns={'result':'出现次数'}).loc[:,['出现次数']]
HWE_ENV=df1.loc[(df1["result"]<=1) ,:].rename(columns={'result':'出现次数'}).loc[:,['出现次数']]
HWV_ENV=df2.loc[(df2["result"]<=3) ,:].rename(columns={'result':'出现次数'}).loc[:,['出现次数']]
HWE_PR = df1.iloc[df1.index.get_level_values(1).str.contains('PR\d{6}:')]
HWV_PR = df2.iloc[df2.index.get_level_values(1).str.contains('PR\d{6}:')]
ErrorMsgByCase=df0.loc[(df0["result"]>3) ,:].rename(columns={'result':'出现次数'}).loc[:,['出现次数']]
# ErrorMsg
# ErrorMsgByCase
# HW
HWE
HWE_plt = HWE.unstack(fill_value=0)['出现次数']
HWE_plt.rename(columns={' DL Tput is below 400Mbps!':'DL Tput error',
' UE 5G attach failed!':'attach failed',
' UL Tput is below 20Mbps!':'UL Tput error'},
inplace = True)
HWE_plt.columns # 查看重要
survey(HWE_plt,T=1)
plt.show()
# df.loc[['index1','index2'],:]
# HWV
# HWV_PR
# HWE_PR
# HWE_ENV
# data = ErrorMsgByCase.reset_index()
# fg = seaborn.factorplot(y='case_name', x='出现次数',col='error_msg_short', data=data, kind='bar')
# fg.fig.set_size_inches(18,3)
# fg.set_xlabels('')