zoukankan      html  css  js  c++  java
  • Matplotlib 基本图表的绘制

    图表类别:线形图、柱状图、密度图,以横纵坐标两个维度为主

    同时可延展出多种其他图表样式

    plt.plot(kind='line', ax=None, figsize=None, use_index=True, title=None, grid=None, legend=False, style=None, logx=False, logy=False, loglog=False, xticks=None, yticks=None, xlim=None, ylim=None, rot=None, fontsize=None, colormap=None, table=False, yerr=None, xerr=None, label=None, secondary_y=False, **kwds)

    1.Series直接生成图表

    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    % matplotlib inline
    # 导入相关模块
    
    import warnings
    warnings.filterwarnings('ignore') 
    # 不发出警告
    
    ts = pd.Series(np.random.randn(1000), index=pd.date_range('1/1/2000', periods=1000))
    ts = ts.cumsum()
    ts.plot(kind='line',
           label = 'hehe',
           style = '--g.',
           color = 'red',
           alpha = 0.4,
           use_index = True,
           rot = 45,
           grid = True,
           ylim = [-50,50],
           yticks = list(range(-50,50,10)),
           figsize = (8,4),
           title = 'test',
           legend = True)
    #plt.grid(True, linestyle = "--",color = "gray", linewidth = "0.5",axis = 'x')  # 网格
    plt.legend()
    # Series.plot():series的index为横坐标,value为纵坐标
    # kind → line,bar,barh...(折线图,柱状图,柱状图-横...)
    # label → 图例标签,Dataframe格式以列名为label
    # style → 风格字符串,这里包括了linestyle(-),marker(.),color(g)
    # color → 颜色,有color指定时候,以color颜色为准
    # alpha → 透明度,0-1
    # use_index → 将索引用为刻度标签,默认为True
    # rot → 旋转刻度标签,0-360
    # grid → 显示网格,一般直接用plt.grid
    # xlim,ylim → x,y轴界限
    # xticks,yticks → x,y轴刻度值
    # figsize → 图像大小
    # title → 图名
    # legend → 是否显示图例,一般直接用plt.legend()
    # 也可以 → plt.plot()

    输出:

    2.Dataframe直接生成图表

    # Dataframe直接生成图表
    
    df = pd.DataFrame(np.random.randn(1000, 4), index=ts.index, columns=list('ABCD'))
    df = df.cumsum()
    df.plot(kind='line',
           style = '--.',
           alpha = 0.4,
           use_index = True,
           rot = 45,
           grid = True,
           figsize = (8,4),
           title = 'test',
           legend = True,
           subplots = False,
           colormap = 'Greens')
    # subplots → 是否将各个列绘制到不同图表,默认False
    # 也可以 → plt.plot(df)

    输出:

    3.柱状图与堆叠图

    # 柱状图与堆叠图
    
    fig,axes = plt.subplots(4,1,figsize = (10,10))
    s = pd.Series(np.random.randint(0,10,16),index = list('abcdefghijklmnop'))  
    df = pd.DataFrame(np.random.rand(10,3), columns=['a','b','c'])
    
    s.plot(kind='bar',color = 'k',grid = True,alpha = 0.5,ax = axes[0])  # ax参数 → 选择第几个子图
    # 单系列柱状图方法一:plt.plot(kind='bar/barh')   # dataframe里面如果有标签的话,默认以标签作为横坐标
    
    df.plot(kind='bar',ax = axes[1],grid = True,colormap='Reds_r')
    # 多系列柱状图
    
    df.plot(kind='bar',ax = axes[2],grid = True,colormap='Blues_r',stacked=True) 
    # 多系列堆叠图
    # stacked → 堆叠
    
    df.plot.barh(ax = axes[3],grid = True,stacked=True,colormap = 'BuGn_r')  #横向的堆叠图 也可以这样写:df.plot(kind = 'barth')
    # 新版本plt.plot.<kind>

    输出:

    5.柱状图的另一种画法

    # 柱状图 plt.bar()
    
    plt.figure(figsize=(10,4))
    x = np.arange(10)
    y1 = np.random.rand(10)
    y2 = -np.random.rand(10)
    
    plt.bar(x,y1,width = 1,facecolor = 'yellowgreen',edgecolor = 'white',yerr = y1*0.1)
    plt.bar(x,y2,width = 1,facecolor = 'lightskyblue',edgecolor = 'white',yerr = y2*0.1)
    # x,y参数:x,y值
    # width:宽度比例
    # facecolor柱状图里填充的颜色、edgecolor是边框的颜色
    # left-每个柱x轴左边界,bottom-每个柱y轴下边界 → bottom扩展即可化为甘特图 Gantt Chart
    # align:决定整个bar图分布,默认left表示默认从左边界开始绘制,center会将图绘制在中间位置
    # xerr/yerr :x/y方向error bar
    
    for i,j in zip(x,y1):
        plt.text(i+0.3,j-0.15,'%.2f' % j, color = 'white')
    for i,j in zip(x,y2):
        plt.text(i+0.3,j+0.05,'%.2f' % -j, color = 'white')
    # 给图添加text
    # zip() 函数用于将可迭代的对象作为参数,将对象中对应的元素打包成一个个元组,然后返回由这些元组组成的列表。

    输出:

     6.面积图

    # 面积图
    
    fig,axes = plt.subplots(2,1,figsize = (8,6))
    df1 = pd.DataFrame(np.random.rand(10, 4), columns=['a', 'b', 'c', 'd'])
    df2 = pd.DataFrame(np.random.randn(10, 4), columns=['a', 'b', 'c', 'd'])
    
    df1.plot.area(colormap = 'Greens_r',alpha = 0.5,ax = axes[0])
    df2.plot.area(stacked=False,colormap = 'Set2',alpha = 0.5,ax = axes[1])
    # 使用Series.plot.area()和DataFrame.plot.area()创建面积图
    # stacked:是否堆叠,默认情况下,区域图被堆叠
    # 为了产生堆积面积图,每列必须是正值或全部负值!
    # 当数据有NaN时候,自动填充0,所以图标签需要清洗掉缺失值

    输出:

    7.填图

    # 填图   默认和坐标轴之间做一个填充
    
    fig,axes = plt.subplots(2,1,figsize = (8,6))
    
    x = np.linspace(0, 1, 500)
    y1 = np.sin(4 * np.pi * x) * np.exp(-5 * x)
    y2 = -np.sin(4 * np.pi * x) * np.exp(-5 * x)
    axes[0].fill(x, y1, 'r',alpha=0.5,label='y1')
    axes[0].fill(x, y2, 'g',alpha=0.5,label='y2')
    # 对函数与坐标轴之间的区域进行填充,使用fill函数
    # 也可写成:plt.fill(x, y1, 'r',x, y2, 'g',alpha=0.5)
    
    x = np.linspace(0, 5 * np.pi, 1000) 
    y1 = np.sin(x)  
    y2 = np.sin(2 * x)  
    axes[1].fill_between(x, y1, y2, color ='b',alpha=0.5,label='area')  
    # 填充两个函数之间的区域,使用fill_between函数
    
    for i in range(2):
        axes[i].legend()
        axes[i].grid()
    # 添加图例、格网

    输出:

    8.饼图

    # 饼图 plt.pie()
    # plt.pie(x, explode=None, labels=None, colors=None, autopct=None, pctdistance=0.6, shadow=False, labeldistance=1.1, startangle=None, 
    # radius=None, counterclock=True, wedgeprops=None, textprops=None, center=(0, 0), frame=False, hold=None, data=None)
    
    s = pd.Series(3 * np.random.rand(4), index=['a', 'b', 'c', 'd'], name='series')
    plt.axis('equal')  # 保证长宽相等
    plt.pie(s,
           explode = [0.1,0,0,0],  #a和其他部分距离偏离0.1
           labels = s.index,
           colors=['r', 'g', 'b', 'c'],
           autopct='%.2f%%',#以二位小数点的百分号的形式显示
           pctdistance=0.6,
           labeldistance = 1.2,
           shadow = True,
           startangle=0,
           radius=1.5,
           frame=False)
    print(s)
    # 第一个参数:数据
    # explode:指定每部分的偏移量 
    # labels:标签
    # colors:颜色
    # autopct:饼图上的数据标签显示方式
    # pctdistance:每个饼切片的中心和通过autopct生成的文本开始之间的比例
    # labeldistance:被画饼标记的直径,默认值:1.1
    # shadow:阴影
    # startangle:开始角度
    # radius:半径
    # frame:图框
    # counterclock:指定指针方向,顺时针或者逆时针

    输出:

    a    0.744065
    b    2.069706
    c    2.159888
    d    0.642984
    Name: series, dtype: float64

    9.直方图+密度图

    # 直方图+密度图
    
    s = pd.Series(np.random.randn(1000))
    s.hist(bins = 20,
           histtype = 'bar',
           align = 'mid',
           orientation = 'vertical',
           alpha=0.5,
           normed =True)
    # bin:箱子的宽度
    # normed 标准化
    # histtype 风格,bar,barstacked,step,stepfilled
    # orientation 水平还是垂直{‘horizontal’, ‘vertical’}
    # align : {‘left’, ‘mid’, ‘right’}, optional(对齐方式)
    
    s.plot(kind='kde',style='k--')
    # 密度图    #如果把直方图和密度图放在一起的话,直方图必须标准化,否则不显示密度图 标准化就是把每个值放到0和1之间 
                #不标准化的化会显示实际值

    输出:

    10.堆叠直方图

    # 堆叠直方图
    
    plt.figure(num=1)
    df = pd.DataFrame({'a': np.random.randn(1000) + 1, 'b': np.random.randn(1000),
                        'c': np.random.randn(1000) - 1, 'd': np.random.randn(1000)-2},
                       columns=['a', 'b', 'c','d'])
    df.plot.hist(stacked=True,
                 bins=20,
                 colormap='Greens_r',
                 alpha=0.5,
                 grid=True)
    # 使用DataFrame.plot.hist()和Series.plot.hist()方法绘制
    # stacked:是否堆叠
    
    df.hist(bins=50)
    # 生成多个直方图

    输出:

    array([[<matplotlib.axes._subplots.AxesSubplot object at 0x000001F92A9B9940>,
            <matplotlib.axes._subplots.AxesSubplot object at 0x000001F92AA016A0>],
           [<matplotlib.axes._subplots.AxesSubplot object at 0x000001F92AA4B8D0>,
            <matplotlib.axes._subplots.AxesSubplot object at 0x000001F92AA8A400>]], dtype=object)

    11.散点图

    # plt.scatter()散点图 散点图会用到很多 因为图片就是散点图
    # plt.scatter(x, y, s=20, c=None, marker='o', cmap=None, norm=None, vmin=None, vmax=None, 
    # alpha=None, linewidths=None, verts=None, edgecolors=None, hold=None, data=None, **kwargs)
    
    plt.figure(figsize=(8,6))
    x = np.random.randn(1000)
    y = np.random.randn(1000)
    plt.scatter(x,y,marker='.',
               s = np.random.randn(1000)*100,
               cmap = 'Reds',
               c = y,
               alpha = 0.8,)
    plt.grid()
    # s:散点的大小
    # c:散点的颜色
    # vmin,vmax:亮度设置,标量
    # cmap:colormap

    输出:

    12.散点矩阵

    # pd.scatter_matrix()散点矩阵
    # pd.scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, 
    # grid=False, diagonal='hist', marker='.', density_kwds=None, hist_kwds=None, range_padding=0.05, **kwds)
    
    df = pd.DataFrame(np.random.randn(100,4),columns = ['a','b','c','d'])
    pd.scatter_matrix(df,figsize=(10,6),
                     marker = 'o',
                     diagonal='kde',
                     alpha = 0.5,
                     range_padding=0.1)
    # diagonal:({‘hist’, ‘kde’}),必须且只能在{‘hist’, ‘kde’}中选择1个 → 每个指标的频率图
    # range_padding:(float, 可选),图像在x轴、y轴原点附近的留白(padding),该值越大,留白距离越大,图像远离坐标原点

    输出:

    array([[<matplotlib.axes._subplots.AxesSubplot object at 0x000002A61B496E10>,
            <matplotlib.axes._subplots.AxesSubplot object at 0x000002A61C9FD550>,
            <matplotlib.axes._subplots.AxesSubplot object at 0x000002A61CA45F28>,
            <matplotlib.axes._subplots.AxesSubplot object at 0x000002A61CA80BE0>],
           [<matplotlib.axes._subplots.AxesSubplot object at 0x000002A61CACAE10>,
            <matplotlib.axes._subplots.AxesSubplot object at 0x000002A61CB06BA8>,
            <matplotlib.axes._subplots.AxesSubplot object at 0x000002A61CB4ECF8>,
            <matplotlib.axes._subplots.AxesSubplot object at 0x000002A61CB5EFD0>],
           [<matplotlib.axes._subplots.AxesSubplot object at 0x000002A61CD5E4E0>,
            <matplotlib.axes._subplots.AxesSubplot object at 0x000002A61CDA9438>,
            <matplotlib.axes._subplots.AxesSubplot object at 0x000002A61CDE7240>,
            <matplotlib.axes._subplots.AxesSubplot object at 0x000002A61CE32C18>],
           [<matplotlib.axes._subplots.AxesSubplot object at 0x000002A61CE6E2E8>,
            <matplotlib.axes._subplots.AxesSubplot object at 0x000002A61CEBBA58>,
            <matplotlib.axes._subplots.AxesSubplot object at 0x000002A61CEF9128>,
            <matplotlib.axes._subplots.AxesSubplot object at 0x000002A61CF42278>]],
          dtype=object)

    13.箱型图

    # 箱型图
    # plt.plot.box()绘制
    
    fig,axes = plt.subplots(2,1,figsize=(10,6))
    df = pd.DataFrame(np.random.rand(10, 5), columns=['A', 'B', 'C', 'D', 'E'])
    color = dict(boxes='DarkGreen', whiskers='DarkOrange', medians='DarkBlue', caps='Gray')
    # 箱型图着色
    # boxes → 箱线
    # whiskers → 分位数与error bar横线之间竖线的颜色
    # medians → 中位数线颜色
    # caps → error bar横线颜色
    
    df.plot.box(ylim=[0,1.2],
               grid = True,
               color = color,
               ax = axes[0])
    # color:样式填充
    
    df.plot.box(vert=False, 
                positions=[1, 4, 5, 6, 8],
                ax = axes[1],
                grid = True,
               color = color)
    # vert:是否垂直,默认True
    # position:箱型图占位

    输出:

    14.箱型图另一种画法

    # 箱型图
    # plt.boxplot()绘制
    # pltboxplot(x, notch=None, sym=None, vert=None, whis=None, positions=None, widths=None, patch_artist=None, bootstrap=None, 
    # usermedians=None, conf_intervals=None, meanline=None, showmeans=None, showcaps=None, showbox=None, showfliers=None, boxprops=None, 
    # labels=None, flierprops=None, medianprops=None, meanprops=None, capprops=None, whiskerprops=None, manage_xticks=True, autorange=False, 
    # zorder=None, hold=None, data=None)
    
    df = pd.DataFrame(np.random.rand(10, 5), columns=['A', 'B', 'C', 'D', 'E'])
    plt.figure(figsize=(10,4))
    # 创建图表、数据
    
    f = df.boxplot(sym = 'o',  # 异常点形状,参考marker
                   vert = True,  # 是否垂直
                   whis = 1.5,  # IQR,默认1.5,也可以设置区间比如[5,95],代表强制上下边缘为数据95%和5%位置
                   patch_artist = True,  # 上下四分位框内是否填充,True为填充
                   meanline = False,showmeans=True,  # 是否有均值线及其形状
                   showbox = True,  # 是否显示箱线
                   showcaps = True,  # 是否显示边缘线
                   showfliers = True,  # 是否显示异常值
                   notch = False,  # 中间箱体是否缺口
                   return_type='dict'  # 返回类型为字典
                  ) 
    plt.title('boxplot')
    print(f)
    
    for box in f['boxes']:
        box.set( color='b', linewidth=1)        # 箱体边框颜色
        box.set( facecolor = 'b' ,alpha=0.5)    # 箱体内部填充颜色
    for whisker in f['whiskers']:
        whisker.set(color='k', linewidth=0.5,linestyle='-')
    for cap in f['caps']:
        cap.set(color='gray', linewidth=2)
    for median in f['medians']:
        median.set(color='DarkBlue', linewidth=2)
    for flier in f['fliers']:
        flier.set(marker='o', color='y', alpha=0.5)
    # boxes, 箱线
    # medians, 中位值的横线,
    # whiskers, 从box到error bar之间的竖线.
    # fliers, 异常值
    # caps, error bar横线
    # means, 均值的横线,

    输出:

    {'boxes': [<matplotlib.patches.PathPatch object at 0x000002A61CBCBA20>, 
    <matplotlib.patches.PathPatch object at 0x000002A61CBDCA90>, <matplotlib.patches.PathPatch object at 0x000002A61CBF1940>, <matplotlib.patches.PathPatch object at 0x000002A61CC098D0>,
    <matplotlib.patches.PathPatch object at 0x000002A61CC1F860>], 'means': [<matplotlib.lines.Line2D object at 0x000002A61CBD44A8>, <matplotlib.lines.Line2D object at 0x000002A61CBEC390>,
    <matplotlib.lines.Line2D object at 0x000002A61CC03320>, <matplotlib.lines.Line2D object at 0x000002A61CC192B0>, <matplotlib.lines.Line2D object at 0x000002A61CB92438>],
    'medians': [<matplotlib.lines.Line2D object at 0x000002A61CBC3EF0>, <matplotlib.lines.Line2D object at 0x000002A61CBE6B38>, <matplotlib.lines.Line2D object at 0x000002A61CBFEAC8>,
    <matplotlib.lines.Line2D object at 0x000002A61CC14A58>, <matplotlib.lines.Line2D object at 0x000002A61CB96470>], 'caps': [<matplotlib.lines.Line2D object at 0x000002A61CBCEBE0>,
    <matplotlib.lines.Line2D object at 0x000002A61CBCECF8>, <matplotlib.lines.Line2D object at 0x000002A61CBE2AC8>, <matplotlib.lines.Line2D object at 0x000002A61CBE69B0>,
    <matplotlib.lines.Line2D object at 0x000002A61CBF7A58>, <matplotlib.lines.Line2D object at 0x000002A61CBFE940>, <matplotlib.lines.Line2D object at 0x000002A61CC0DF98>,
    <matplotlib.lines.Line2D object at 0x000002A61CC148D0>, <matplotlib.lines.Line2D object at 0x000002A61CB9DF28>, <matplotlib.lines.Line2D object at 0x000002A61CB9D208>],
    'fliers': [<matplotlib.lines.Line2D object at 0x000002A61CBD4B70>, <matplotlib.lines.Line2D object at 0x000002A61CBECB00>, <matplotlib.lines.Line2D object at 0x000002A61CC03A90>,
    <matplotlib.lines.Line2D object at 0x000002A61CC19A20>, <matplotlib.lines.Line2D object at 0x000002A61CC24EB8>], 'whiskers': [<matplotlib.lines.Line2D object at 0x000002A61CBCBE80>,
    <matplotlib.lines.Line2D object at 0x000002A61CBCBFD0>, <matplotlib.lines.Line2D object at 0x000002A61CBDCFD0>, <matplotlib.lines.Line2D object at 0x000002A61CBE2940>,
    <matplotlib.lines.Line2D object at 0x000002A61CBF1F98>, <matplotlib.lines.Line2D object at 0x000002A61CBF78D0>, <matplotlib.lines.Line2D object at 0x000002A61CC09F28>,
    <matplotlib.lines.Line2D object at 0x000002A61CC0D860>, <matplotlib.lines.Line2D object at 0x000002A61CC1FEB8>, <matplotlib.lines.Line2D object at 0x000002A61CC247F0>]}

     

    # 箱型图
    # plt.boxplot()绘制
    # 分组汇总
    
    df = pd.DataFrame(np.random.rand(10,2), columns=['Col1', 'Col2'] )
    df['X'] = pd.Series(['A','A','A','A','A','B','B','B','B','B'])
    df['Y'] = pd.Series(['A','B','A','B','A','B','A','B','A','B'])
    print(df)
    df.boxplot(by = 'X')
    df.boxplot(column=['Col1','Col2'], by=['X','Y'])
    # columns:按照数据的列分子图
    # by:按照列分组做箱型图

    输出:

           Col1      Col2  X  Y
    0  0.661114  0.164637  A  A
    1  0.483369  0.361403  A  B
    2  0.954009  0.786664  A  A
    3  0.173198  0.500602  A  B
    4  0.156583  0.047123  A  A
    5  0.852358  0.672986  B  B
    6  0.823713  0.625156  B  A
    7  0.705710  0.632264  B  B
    8  0.940125  0.091521  B  A
    9  0.230993  0.753328  B  B

    
    
    
    
    
    
  • 相关阅读:
    【Networking】(转)一个非常好的epoll+线程池服务器Demo
    【算法】Logistic regression (逻辑回归) 概述
    【Linux】/dev/null 2>&1 详解
    单点登录与联合登录
    web项目报outmemory错误解决方案
    hadoop学习之HDFS
    ELK日志分析系统
    基于cookie共享的SSO中的遇到的问题
    oracle的隐式游标
    mysql截取字符串与reverse函数
  • 原文地址:https://www.cnblogs.com/carlber/p/9939383.html
Copyright © 2011-2022 走看看