zoukankan      html  css  js  c++  java
  • python 可视化

    摘自:https://www.cnblogs.com/iupoint/p/10893641.html

    1
    2
    3
    4
    5
    import numpy as np
    import pandas as pd
    import matplotlib
    import matplotlib.pyplot as plt
    import seaborn as sns

      matplotlib参数设置

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    matplotlib.rcParams['font.sans-serif'= ['SimHei']
    matplotlib.rcParams['font.family']='sans-serif'
    matplotlib.rcParams['axes.unicode_minus'= False
    #matplotlib.fontsize='15'
     
    #plt.rcParams['figure.figsize'] = (12.0,5.0)  #设置图形大小
     
    #图形内嵌式,notebook模式下(注释不可加在下列命令后)
    %matplotlib inline
    #ipython模式下
    #%pylab inline

      seaborn参数设置

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    #Seaborn有两组函数对风格进行控制:axes_style()/set_style()函数和plotting_context()/set_context()函数。
    #Seaborn有5种预定义的主题:darkgrid(默认)、whitegrid、dark、white、ticks
    #Seaborn有4种预定义的上下文:paper、notebook(默认)、talk、poster
    sns.set_style("whitegrid")
    '''
    sns.set_context("poster")
    sns.set_style(style=None, rc=None)
    sns.despine(offset=10)  #图与轴线距离
    sns.despine()  #去除刻度和轴线
    sns.set_context(fontscale=1.5)  #字体大小
    sns.set_context(rc={'lines.linewidth':1.5)  #线宽
    sns.set()   #恢复默认值
    '''

      其他参数设置

    1
    2
    3
    4
    5
    6
    myfont = matplotlib.font_manager.FontProperties(fname="simsun.ttc")  #自定义字体库simsun.ttc
    ax1.set_xlabel('时间', fontproperties=myfont, size=18)  #原始matplotlib不支持中文
    plt.gcf().set_facecolor(np.ones(3* 240/255)  #设置背景色
    plt.gcf().autofmt_xdate()  #自动适应刻度线密度,包括x轴,y轴
    plt.legend(loc=1)  #1,2,3,4分别对应图像的右上角,左上角,左下角,右下角
    ax.invert_xaxis()  #将x轴逆序

      线图(1)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    #数据
    x=np.linspace(0,10,1000)
    y1=np.sin(x)
    y2=np.cos(x)
    y3=np.cos(x**2)
     
    plt.figure(1#图编号
    plt.subplot(221)
    plt.plot(x,y1,label="$sin(x)$",color="red",linewidth=2)
    plt.plot(x,y2,label="$cos(x)$",color="blue",linewidth=2)
     
    plt.subplot(222)
    plt.scatter(x[:1000:50],y2[:1000:50],color="blue",label="$cos(x^2)$")
     
    plt.subplot(212#改变图分块
    plt.plot(x,y1+y3,"g-",label="$sin(x)+cos(x^2)$")
    plt.xlabel("time")
    plt.ylabel("value")
    plt.title("$sin(x)+cos(x^2)$ curve")
    plt.xlim(-0.2,10.2)
    plt.legend()#显示左下角的图例
     
    plt.subplots_adjust(left=0.08,right=0.95,wspace=0.25,hspace=0.45)
    #subplots_adjust类似于网页css格式化中的边距处理,取决于你需要绘制的大小和各模块之间的间距
    plt.show()

      线图(2)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    plt.figure(3)
    plt.rcParams['figure.figsize'= (12,4)
     
    plt.subplot(121)
    def sinplot(flip=1):
        x=np.linspace(0,14,100)
        for in range(1,7):
            plt.plot(x,np.sin(x+i*0.5)*(7-i)*flip)
    sinplot()
     
    plt.subplot(122)
    = np.arange(02*np.pi, 0.02
    = np.sin(x) 
    y1 = np.sin(2*x) 
    y2 = np.sin(3*x) 
    ym1 = np.ma.masked_where(y1 > 0.5, y1) 
    ym2 = np.ma.masked_where(y2 < -0.5, y2) 
    #绘图
    lines = plt.plot(x, y, x, ym1, x, ym2, 'o'
    #设置线的属性
    plt.setp(lines[0], linewidth=1
    plt.setp(lines[1], linewidth=2
    plt.setp(lines[2], linestyle='-',marker='^',markersize=2
    #线的标签
    plt.legend(('No mask''Masked if > 0.5''Masked if < -0.5'), loc='upper right'
    plt.title('Masked line demo'
    plt.show()

      条形图+饼图+直方图+阶梯图

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    plt.figure(2)
    #数据
    np.random.seed(sum(map(ord,"aesthetics")))
    d1 = dict([['A',5], ['B',7], ['C',3]])
    d2 = np.random.randn(1000)
     
    #条形图
    plt.subplot(221)
    plt.bar(d1.keys(),d1.values(),align='center'#,alpha=.7,color='g'
    #plt.bar(range(3),d1.values(),align='center')
    #plt.xticks(range(3),xticks)
    plt.ylabel("Frequency")
    plt.title("Numbers of Books Students Read")
     
    #饼图
    plt.subplot(222)
    plt.pie(d1.values(),labels=d1.keys(),autopct='%1.1f%%')
    plt.title("Number of Books Students Read")
     
    #直方图
    plt.subplot(223)
    plt.hist(d2,100)
    plt.xlabel('Heights')
    plt.ylabel('Frequency')
    plt.title('Height of Students')
     
    #阶梯曲线/累积分布曲线
    plt.subplot(224)
    plt.hist(d2,20,normed=True,histtype='step',cumulative=True)
    plt.xlabel('Heights')
    plt.ylabel('Frequency')
    plt.title('Heights of Students')
     
    plt.subplots_adjust(left=0.08,right=0.95,wspace=0.25,hspace=0.45)  #图间距
    plt.show()

       饼图+箱线图

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    plt.figure(2)
    plt.subplot(121)  #fig, ax
    animals = dict([['frogs',15], ['hogs',20], ['dogs',45],['cats',10]])
    colors = 'yellowgreen','gold','lightskyblue','lightcoral'
    explode = 0,0.1,0,0
    plt.pie(animals.values(), explode=explode, labels=animals.keys(),
            colors=colors, autopct='%1.1f%%', shadow=True, startangle=50)  #ax.pie
    #ax.set(aspect="equal", title='Pie plot with animals')
    plt.axis('equal')
     
    plt.subplot(122)
    plt.boxplot(animals.values(),labels=['animals'])
    #plt.boxplot((x,y,z),labels=('x','y','z')) #水平vert=False,whis=1.5
    #df.boxplot()
    plt.title('Heights of Students')
    plt.show()

      雷达图 + 圆环图 

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    plt.figure(figsize=(12,4), facecolor="white")
    #数据
    labels=np.array(['综合''第一周','第二周','第三周''第四周''第五周'])  #标签
    nAttr = 6  #数据点个数
    values = np.array([88.78590957096])  #原始数据
    angles = np.linspace(0,2*np.pi, nAttr, endpoint=False)  #弧度
    #首尾相连
    values = np.concatenate((values,[values[0]]))
    angles = np.concatenate((angles,[angles[0]]))
    #绘图
    plt.subplot(121, polar=True)  #极坐标系
    plt.plot(angles, values, 'bo-', color='g', linewidth=2)  #线
    plt.fill(angles, values, facecolor='g', alpha=0.2)  #区域
    plt.thetagrids(angles*180/np.pi, labels)  #标签
    #plt.figtext(0.52, 0.95, 'python成绩分析图', ha='center')  #标题
    plt.title('python成绩分析图')
    plt.grid(True)
    #plt.savefig('dota_radar.JPG')
     
    plt.subplot(122)
    #fig, ax = plt.subplots()
    vals1 = [1234]
    vals2 = [2345]
    vals3=[1]
    labels = 'A''B''C''D'
    plt.pie(vals1, radius=1.2, autopct='%1.1f%%', pctdistance=0.9)
    plt.pie(vals2, radius=1, autopct='%1.1f%%', pctdistance=0.75)
    plt.pie(vals3, radius=0.6, colors='w')
    #ax.set(aspect="equal", title='Pie plot with `ax.pie`')
    plt.title('Pie plot with xx')
    plt.legend(labels, loc='best'#bbox_to_anchor=(1, 1), loc='best', borderaxespad=0.
    plt.show()

      散点图+直方图

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    plt.figure(figsize=(12,4))
     
    #散点图
    plt.subplot(121)
    import matplotlib.cm as cm
    def scatter_plot_by_category(feat, x, y):
        gs = df.groupby(feat)
        cs = cm.rainbow(np.linspace(01len(gs)))
        for g, c in zip(gs, cs):
            plt.scatter(g[1][x], g[1][y], color=c, alpha=0.5)
    scatter_plot_by_category('target''sepal length (cm)''sepal width (cm)')
    plt.xlabel('sepal length (cm)')
    plt.ylabel('sepal width (cm)')
    plt.title('target')
     
    #直方图
    plt.subplot(122)
    mu, sigma = 10015
    = mu + sigma * np.random.randn(10000)
    x1 = np.linspace(x.min(), x.max(), 1000)
    normal = mlab.normpdf(x1, mu, sigma) #生成正态曲线的数据
    kde = mlab.GaussianKDE(x) #生成核密度曲线的数据
     
    #color='steelblue'
    #bins=np.arange(x.min(),x.max(), 5)
    #normed=True,     #频率直方图
    #cumulative=True, #积累直方图
    n, bins, patches = plt.hist(x, bins=50, density=1, edgecolor ='k', facecolor='g', alpha=0.75)  #边界色 + 填充色
     
    line1, = plt.plot(x1, normal, 'r-', linewidth = 2)
    line2, = plt.plot(x1, kde(x1), 'g-', linewidth = 2)
     
    plt.legend([line1, line2],[ '正态曲线''核密度曲线'],loc= 'best')
    plt.tick_params(top= 'off', right= 'off')  #去除边界刻度
    plt.axvline(90)   #参考线
    plt.text(60, .025, r'$mu=100, sigma=15$')  #文本
    plt.axis([4016000.03])  #刻度区间
    plt.grid(ls='--')
    plt.xlabel('Smarts')
    plt.ylabel('Probability')
    plt.title('Histogram of IQ')
     
    plt.show()

      seaborn.barplot绘制柱状图    更多:Seaborn常见绘图总结

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    import numpy as np
    import seaborn as sns
    import matplotlib.pyplot as plt
     
    plt.figure(figsize=(12,4))
    plt.subplot(121)
    a=np.arange(40).reshape(10,4)
    df=pd.DataFrame(a,columns=['a','b','c','d'])
    df['a']=[0,4,4,8,8,8,4,12,12,12]
    df['d']=list('aabbabbbab')
    sns.barplot(x='a', y='b', data=df, hue='d')  #分类柱状图
    plt.subplot(122)
    plt.bar(df['a'], df['b'], label='b')
    #barh(x,y)
    plt.bar(df['a'], df['c'], bottom=df['b'], color='r', label='c')
    plt.legend(loc=2)
    plt.show()

      并列柱状图

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    bar_width = 0.3
    = np.arange(3)
    tick_label = ['一级医院','二级医院','三级医院']
    plt.figure(figsize=(12,4))
    plt.subplot(121)
    #data1.groupby('医院等级').sum()[['医院数','本地定点医院数']].plot(kind="bar",width = .8)  #.unstack()
    #data1[['医院数','本地定点医院数']].plot(kind="bar",width = .8)
    plt.bar(x, data1['医院数'], width=bar_width, align="center", color="c", label="全部医院", alpha=0.5)
    plt.bar(x+bar_width, data1['本地定点医院数'], width=bar_width, align="center", color="b", label="本地定点医院", alpha=0.5)
    plt.xticks(x+bar_width/2, tick_label)
    plt.legend()
    plt.title('舟山市居民就医医院的等级分布')
     
    #plt.title('医院数分布')
    plt.subplot(122)
    plt.bar(x, data1['总单号数'], width=bar_width, align="center", color="c", label="全部医院", alpha=0.5)
    plt.bar(x+bar_width, data1['本地定点医院单号量'], width=bar_width, align="center", color="b", label="本地定点医院", alpha=0.5)
    plt.xticks(x+bar_width/2, tick_label)
    plt.legend()
    plt.title('舟山市居民在各等级医院就医的单号量分布')
    plt.show()

      堆积图

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    total = df.sum(axis=1)
    for in df.columns:
        df[i] = df[i] / total
         
    bottom = 0
    for in range(df.shape[1]):
        = df.iloc[:n,i]
        plt.bar(x, y, bottom=bottom)
        bottom += y
    plt.legend(['一级医院','二级医院','三级医院'])
    plt.title('100种常见病在不同医院等级下的单号量分布图')

      柱状折线图 / 双轴图(增速要乘100的哦)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    df = pd.DataFrame({'x':list('abcd'), 'y':[2015108], 'r':[0.30.50.40.1]})
     
    #plt.rcParams['figure.figsize'] = (12.0,5.0)
    fig = plt.figure(figsize=(8,4))
      
    #画柱子
    ax1 = fig.add_subplot(111)
    ax1.bar(df['x'], df['y'], alpha=.7, color='g')
     
    ax1.set_ylabel('xx收入', fontsize=12)
    plt.xticks(range(df.shape[0]), df['x'])
    plt.xticks(fontsize=10)  #后面设置不了
    plt.yticks(fontsize=10)
     
    #画折线图
    ax2 = ax1.twinx()
    ax2.plot(df['x'], df['r'], 'r', marker='*', ms=10)
     
    ax2.set_ylim([0,0.6])
    ax2.set_ylabel('同比增速(%)', fontsize=12)
    plt.yticks(fontsize=10)
     
    #ax1.set_xticklabels('defg', rotation=-45)  #旋转效果
    plt.title('近年xx公司xx收入与同比增速', fontsize=16)
    plt.grid(False)
      
    #添加数据标签
    for in range(df.shape[0]):
        #plt.text(i, df['y'][i]+0.3, str(df['y'][i]), ha='center', va='bottom', fontsize=15, rotation=0)
        plt.text(i, df['r'][i], str(df['r'][i]), ha='center', va='bottom', fontsize=12, rotation=0)
     
    #保存与展示
    #dpi为图像分辨率, bbox_inches='tight'代表去除空白
    #plt.savefig('e:/tj/month/fx1806/公司保费增速与同比.png', dpi=600, bbox_inches='tight')
    plt.show()

      柱状折线图 -- 合并label

    1
    2
    3
    4
    5
    6
    7
    8
    9
    fig = plt.figure(figsize=(104))
    ax1 = fig.add_subplot(111)
    lns1 = ax1.bar(range(ind.sum()), data.loc[ind,'单号数'], alpha=.7, color='b', label=r'单号数')
    ax2 = ax1.twinx()
    lns2 = ax2.plot(range(ind.sum()), data.loc[ind,'用药(包含检查等)种类数'], color='r', marker='*', ms=4, linewidth=1, label=r'用药(包含检查等)种类数')
    lns = [lns1]+lns2
    labs = [l.get_label() for in lns]
    ax1.legend(lns, labs, loc=0)
    plt.show()

      其他条形图

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    plt.figure(figsize=(103))
    #重叠条形图
    plt.subplot(121)
    data_hour2015 = pd.DataFrame(np.random.randint(10, size=(100,)), columns=['num'])
    data_hour2016 = pd.DataFrame(np.random.randint(10, size=(100,)), columns=['num'])
    data_hour2017 = pd.DataFrame(-np.random.randint(10, size=(100,)), columns=['num'])
    data_hour2015['num'].plot.bar(color='g', alpha=0.6, label='2015年')
    data_hour2016['num'].plot.bar(color='r', alpha=0.6, label='2016年')
    data_hour2017['num'].plot.bar(color='b', alpha=0.6, label='2017年')
    #plt.ylabel('counts')
    #plt.title('missing')
    plt.legend(loc='upper right')
    plt.xticks([0,19,39,59,79,99], [1,20,40,60,80,100])
     
    #二维频数分布图
    plt.subplot(122)
    = np.random.randn(1000)+2
    = np.random.randn(1000)+3
    plt.hist2d(x,y,bins=40)
    plt.show()

      自定义图例  参考

    注意:数据点过多会导致部分bar显示不全的情况

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    import matplotlib.pyplot as plt
    import matplotlib.patches as mpatches
     
    colors = ['red''green''blue']
    labels = ['一级医院''二级医院''三级医院']
    c_map = data['hirate'].map(lambda x:colors[int(x)-1]).tolist()
     
    plt.figure(figsize=(8,4))
    plt.bar(range(len(data['hicode'])), data['counts'], color=c_map)  #width=0.5
    #plt.ylim(-0.01, 5000000)
    # 自定义刻度
    plt.xticks(ticks=np.arange(7)*100, labels=data['hicode'][np.arange(7)*100])
    # 自定义图例
    patches = [mpatches.Patch(color=colors[i], label="{:s}".format(labels[i])) for in range(len(colors)) ]
    ax = plt.gca()
    #box = ax.get_position()
    #ax.set_position([box.x0, box.y0, box.width , box.height* 0.8])
    ax.legend(handles=patches, loc=0)  #bbox_to_anchor=(0.95,1.12)设定位置, ncol=1列数
    plt.title('医院编码 - 接诊单号量分布图')
    plt.show()

      并列条形图 -- 参考链接

    1
    df.groupby(['Region','Tier'],sort=True).sum()[['Sales2015','Sales2016']].unstack().plot(kind="bar",width = .8)

      DataFrame数据绘图

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    #柱状图
    speed = [0.117.54048526988]
    lifespan = [28701.5251228]
    index = ['snail''pig''elephant','rabbit''giraffe''coyote''horse']
    df = pd.DataFrame({'speed': speed, 'lifespan': lifespan}, index=index)
    ax = df.plot.barh(x='lifespan')
    #df.plot.bar()
     
     
    #直方图
    df = pd.DataFrame(np.random.randint(176000), columns = ['one'])
    df['two'= df['one'+ np.random.randint(176000)
    ax = df.plot.hist(bins=12, alpha=0.5)
     
     
    #箱线图
    data = np.random.randn(254)
    df = pd.DataFrame(data, columns=list('ABCD'))
    ax = df.plot.box()
     
    #六边形热力图
    = 10000
    df = pd.DataFrame({'x': np.random.randn(n), 'y': np.random.randn(n)})
    ax = df.plot.hexbin(x='x', y='y', gridsize=20)
     
    = 500
    df = pd.DataFrame({'coord_x': np.random.uniform(-33, size=n),
                       'coord_y': np.random.uniform(3050, size=n),
                       'observations': np.random.randint(1,5, size=n)})
    ax = df.plot.hexbin(x='coord_x',
                        y='coord_y',
                        C='observations',
                        reduce_C_function=np.sum,
                        gridsize=10,
                        cmap="viridis")
     
    #核密度
    df = pd.DataFrame({'x': [122.533.545],
                       'y': [444.555.566],})
    ax = df.plot.kde()
    ax = df.plot.kde(bw_method=0.3)
    ax = df.plot.kde(bw_method=3)
    ax = df.plot.kde(ind=[123456])
     
    #线图
    df = pd.DataFrame({'pig': [20184896751776],
                       'horse': [4252816001900]},
                      index=[19901997200320092014])
    lines = df.plot.line()
    axes = df.plot.line(subplots=True)
    lines = df.plot.line(x='pig', y='horse')
     
    #饼图
    df = pd.DataFrame({'mass': [0.3304.87 5.97],
                       'radius': [2439.76051.86378.1]},
                      index=['Mercury''Venus''Earth'])
    ax = df.plot.pie(y='mass', subplots=True, figsize=(63))
    ax = df.plot.pie(y='radius', subplots=True, figsize=(63))
     
    #散点图
    df = pd.DataFrame([[5.13.50], [4.93.00], [7.03.21],
                      [6.43.21], [5.93.02]],
                      columns=['length''width''species'])
    ax1 = df.plot.scatter(x='length',
                          y='width',
                          c='DarkBlue')
    ax2 = df.plot.scatter(x='length',
                          y='width',
                          c='species',
                          colormap='viridis')

      

       矩阵图

    1
    2
    3
    4
    5
    6
    7
    8
    import pandas as pd
    = pd.DataFrame(np.random.randn(200,4)*100, columns = ['A','B','C','D'])
    cs = np.random.randint(3, size=200)
    #c='k',cmap=mglearn.cm3
    pd.scatter_matrix(x, figsize=(8,8), c = cs, marker = '+',
                      diagonal='hist', hist_kwds={'bins':10'edgecolor':'k'},
                      alpha = 0.8, range_padding=0.1)
    plt.show()

      热力图

    1
    2
    3
    4
    5
    6
    #corr = df.corr()
    flights = sns.load_dataset("flights")
    flights = flights.pivot("month""year""passengers")
    fig, ax = plt.subplots(figsize = (64.5))
    sns.heatmap(flights, annot=True,fmt="d",linewidths=.5, ax = ax)  #cmap='RdBu'
    plt.show()

      violinplot图

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    from sklearn.datasets import load_iris
    iris = load_iris()
    df = pd.DataFrame(iris['data'], columns=iris['feature_names'])
    df['target'= iris['target']<br>
    plt.figure(figsize=(98))
    for column_index, column in enumerate(df.columns):
        if column == 'target':
            continue
        plt.subplot(22, column_index + 1)
        sns.violinplot(x='target', y=column, data=df)

      数学教科书上展示的图

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    plt.figure(1)
    = np.linspace(-np.pi,np.pi,256,endpoint=True)
    co, si = np.cos(x), np.sin(x)
     
    plt.plot(x, co, color="blue", linewidth=1.0, linestyle="-", label="cos", alpha=0.5)
    plt.plot(x, si, "r*", markersize=1, label="sin")
     
    #创建一个坐标轴的编辑器
    ax=plt.gca()
    #隐藏右边和上边的轴线,将左边和下边的轴线移到中间(数据域),把刻度数据放到下边和左边
    ax.spines['right'].set_color("none")
    ax.spines['top'].set_color("none")
    ax.spines['left'].set_position(("data",0))
    ax.spines['bottom'].set_position(("data",0))
    ax.xaxis.set_ticks_position("bottom")
    ax.yaxis.set_ticks_position("left")
    #设置刻度及刻度标签格式
    plt.xticks([-np.pi,-np.pi/2,0,np.pi/2,np.pi], [r'$-pi$',r'$-pi/2$',r'$0$',r'$pi/2$',r'$pi$'])
    plt.yticks(np.linspace(-1,1,5, endpoint=True))
    for label in ax.get_xticklabels()+ax.get_yticklabels():
        label.set_fontsize(10)  #字体
        label.set_bbox(dict(facecolor="white", edgecolor="None", alpha=0.2))
     
    #色彩填充
    plt.fill_between(x, np.abs(x)<0.5, co, co>0.5, color="red", alpha=0.2)
     
    #添加注释
    '''
    xy为标注值,xycoords="data"表示使用原始坐标
    xytext:文本位置,textcoords设置其坐标规范(坐标偏移)
    arrowprops设置箭头属性(参数类型为字典), arrowstyle为箭头风格, connectionstyle为连接风格
    '''
    = 1
    plt.plot([t,t], [0,np.cos(t)], 'y', color ='yellow', linewidth=2, linestyle="--")
    plt.scatter([t,t], [0,np.cos(t)], 50, color ='red')
    plt.annotate("cos(1)", xy=(t, np.cos(t)), xycoords="data",
                 xytext=(+10+20), textcoords="offset points", fontsize=12,
                 arrowprops=dict(arrowstyle="->", connectionstyle="arc3,rad=.2"))
     
    = 2*np.pi/3
    plt.plot([t,t], [0,np.sin(t)], 'y', color ='yellow', linewidth=2, linestyle="--")
    plt.scatter([t,t],[0,np.sin(t)], 50, color ='green')
    plt.annotate(r'$sin(frac{2pi}{3})=frac{sqrt{3}}{2}$', xy=(t,np.sin(t)), xycoords='data',
                 xytext=(+10+30), textcoords='offset points', fontsize=12,
                 arrowprops=dict(arrowstyle="->", connectionstyle="arc3,rad=.2"))
     
    plt.title("cos&sin")
    plt.legend(loc="upper left")
    plt.grid(ls='--')
    plt.axis([-3.15,3.15,-1.05,1.05])
     
    plt.show()

      插值图

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    from scipy.interpolate import griddata
     
    def func(x, y):
        return x*(1-x)*np.cos(4*np.pi*x) * np.sin(4*np.pi*y**2)**2
         
    points = np.random.rand(10002)
    values = func(points[:,0], points[:,1])
    grid_x, grid_y = np.mgrid[0:1:100j0:1:200j]
     
    grid_z0 = griddata(points, values, (grid_x, grid_y), method='nearest')
    grid_z1 = griddata(points, values, (grid_x, grid_y), method='linear')
    grid_z2 = griddata(points, values, (grid_x, grid_y), method='cubic')
     
    plt.subplot(221)
    plt.imshow(func(grid_x, grid_y).T, extent=(0,1,0,1), origin='lower')
    plt.plot(points[:,0], points[:,1], 'k.', ms=1)
    plt.title('Original')
    plt.subplot(222)
    plt.imshow(grid_z0.T, extent=(0,1,0,1), origin='lower')
    plt.title('Nearest')
    plt.subplot(223)
    plt.imshow(grid_z1.T, extent=(0,1,0,1), origin='lower')
    plt.title('Linear')
    plt.subplot(224)
    plt.imshow(grid_z2.T, extent=(0,1,0,1), origin='lower')
    plt.title('Cubic')
    plt.gcf().set_size_inches(66)
    plt.show()

      等高线图 

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    import numpy as np
    import matplotlib.pyplot as plt
    #import matplotlib as mpl
    #from matplotlib import colors
     
    #建立步长为0.01,即每隔0.01取一个点
    step = 0.01
    = np.arange(-10,10,step)
    = np.arange(-10,10,step)
    #也可以用x = np.linspace(-10,10,100)表示从-10到10,分100份
     
    #将原始数据变成网格数据形式
    X,Y = np.meshgrid(x,y)
    = X**2+Y**2
     
    #等高线图
    plt.figure(figsize=(10,6))  #设置画布大小
    plt.subplot(231)
    plt.contour(X,Y,Z)  #等高线
     
    plt.subplot(232)
    contour = plt.contour(X,Y,Z, [20,40,60], colors='k')  #只画z=20和40的线,黑色
    plt.clabel(contour, fontsize=10, colors=('k','r','b'), fmt='%.4f')  #标注高度(字体,颜色,小数)
     
    plt.subplot(233)
    contour = plt.contour(X,Y,Z, 4, colors='k')  #只画z=20和40的线,黑色
    plt.clabel(contour, fontsize=10, colors='b', fmt='%.2f')  #标注高度(字体,颜色,小数)
     
    plt.subplot(234)
    plt.contourf(X,Y,Z)  #填充颜色,f即filled
    plt.xticks(())  #去掉刻度
    plt.yticks(())
     
    plt.subplot(235)
    cset = plt.contourf(X,Y,Z,6,cmap=plt.cm.hot)
    plt.colorbar(cset)
     
    plt.subplot(236)
    cset = plt.contourf(X,Y,Z,6,alpha=1,vmin=0,vmax=100, cmap='hot_r')  #6种颜色, 颜色取反
    plt.colorbar(cset)
    contour = plt.contour(X,Y,Z,8,colors='k')  #8条线
    plt.clabel(contour,fontsize=10,colors='k')
    plt.scatter(0,0,color='r')
    plt.show()
     
    #colorslist = ['w','gainsboro','gray','aqua']
    #将颜色条命名为mylist,一共插值颜色条50个
    #cmaps = colors.LinearSegmentedColormap.from_list('mylist',colorslist,N=200)
    #cmap='hot' 'BuGn', plt.get_cmap('YlOrBr_r'), mpl.cm.hot

       聚类结果的可视化(1)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    from itertools import cycle
    import matplotlib.pyplot as plt
     
    plt.close('all')
    plt.figure(figsize=(12,4))
    plt.clf()
     
    unique_labels = set(db.labels_)
    core_samples_mask = np.zeros_like(db.labels_, dtype=bool)  # 设置一个样本个数长度的全false向量
    core_samples_mask[db.core_sample_indices_] = True #将核心样本部分设置为true
     
    # 使用黑色标注离散点
    plt.subplot(121)
    colors = [plt.cm.Spectral(each) for each in np.linspace(01len(unique_labels))]
    for k, col in zip(unique_labels, colors):
        if == -1:  # 聚类结果为-1的样本为离散点
            # 使用黑色绘制离散点
            col = [0001]
     
        class_member_mask = (db.labels_ == k)  # 将所有属于该聚类的样本位置置为true
     
        xy = X[class_member_mask & core_samples_mask]  # 将所有属于该类的核心样本取出,使用大图标绘制
        plt.plot(xy[:, 0], xy[:, 2], 'o', markerfacecolor=tuple(col),markeredgecolor='k', markersize=14)
     
        xy = X[class_member_mask & ~core_samples_mask]  # 将所有属于该类的非核心样本取出,使用小图标绘制
        plt.plot(xy[:, 0], xy[:, 2], 'o', markerfacecolor=tuple(col),markeredgecolor='k', markersize=6)
     
    plt.title('对医院医疗耗材的异常值检测最佳聚类数: %d' % n_clusters_)
    plt.xlabel(r'CQ类材料使用频率(%)')
    plt.ylabel(r'单价200元以上CL类使用频率(%)')
    #plt.show()
     
     
    plt.subplot(122)
    colors = cycle('bgrcmybgrcmybgrcmybgrcmy')
    for k, col in zip(unique_labels, colors):
        class_member_mask = db.labels_ == k
        if == -1:
            plt.plot(X[class_member_mask, 0], X[class_member_mask, 2], 'k' + '.')
        else:
            cluster_center = X[class_member_mask & core_samples_mask].mean(axis=0)
            plt.plot(X[class_member_mask, 0], X[class_member_mask, 2], col + '.')
            plt.plot(cluster_center[0], cluster_center[2], 'o', markerfacecolor=col,
                     markeredgecolor='k', markersize=14)
            for in X[class_member_mask]:
                plt.plot([cluster_center[0], x[0]], [cluster_center[2], x[2]], col)
     
    plt.title('Estimated number of clusters: %d' % n_clusters_)
    plt.xlabel(r'CQ类材料使用频率(%)')
    plt.ylabel(r'单价200元以上CL类使用频率(%)')
    plt.show()

       聚类结果的可视化(2) 

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    print(__doc__)
     
    from time import time
    import numpy as np
    import matplotlib.pyplot as plt
     
    from sklearn import metrics
    from sklearn.cluster import KMeans
    from sklearn.datasets import load_digits
    from sklearn.decomposition import PCA
    from sklearn.preprocessing import scale
     
    np.random.seed(42)
     
    digits = load_digits()
    data = scale(digits.data)
     
    n_samples, n_features = data.shape
    n_digits = len(np.unique(digits.target))
    labels = digits.target
     
    sample_size = 300
     
    print("n_digits: %d, n_samples %d, n_features %d"
          % (n_digits, n_samples, n_features))
     
     
    print(82 * '_')
    print('init time inertia homo compl v-meas ARI AMI silhouette')
     
     
    def bench_k_means(estimator, name, data):
        t0 = time()
        estimator.fit(data)
        print('%-9s %.2fs %i %.3f %.3f %.3f %.3f %.3f %.3f'
              % (name, (time() - t0), estimator.inertia_,
                 metrics.homogeneity_score(labels, estimator.labels_),
                 metrics.completeness_score(labels, estimator.labels_),
                 metrics.v_measure_score(labels, estimator.labels_),
                 metrics.adjusted_rand_score(labels, estimator.labels_),
                 metrics.adjusted_mutual_info_score(labels,  estimator.labels_,
                                                    average_method='arithmetic'),
                 metrics.silhouette_score(data, estimator.labels_,
                                          metric='euclidean',
                                          sample_size=sample_size)))
     
    bench_k_means(KMeans(init='k-means++', n_clusters=n_digits, n_init=10),
                  name="k-means++", data=data)
     
    bench_k_means(KMeans(init='random', n_clusters=n_digits, n_init=10),
                  name="random", data=data)
     
    # in this case the seeding of the centers is deterministic, hence we run the
    # kmeans algorithm only once with n_init=1
    pca = PCA(n_components=n_digits).fit(data)
    bench_k_means(KMeans(init=pca.components_, n_clusters=n_digits, n_init=1),
                  name="PCA-based",
                  data=data)
    print(82 * '_')
     
    # #############################################################################
    # Visualize the results on PCA-reduced data
     
    reduced_data = PCA(n_components=2).fit_transform(data)
    kmeans = KMeans(init='k-means++', n_clusters=n_digits, n_init=10)
    kmeans.fit(reduced_data)
     
    # Step size of the mesh. Decrease to increase the quality of the VQ.
    = .02     # point in the mesh [x_min, x_max]x[y_min, y_max].
     
    # Plot the decision boundary. For that, we will assign a color to each
    x_min, x_max = reduced_data[:, 0].min() - 1, reduced_data[:, 0].max() + 1
    y_min, y_max = reduced_data[:, 1].min() - 1, reduced_data[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
     
    # Obtain labels for each point in mesh. Use last trained model.
    = kmeans.predict(np.c_[xx.ravel(), yy.ravel()])
     
    # Put the result into a color plot
    = Z.reshape(xx.shape)
    plt.figure(1)
    plt.clf()
    plt.imshow(Z, interpolation='nearest',
               extent=(xx.min(), xx.max(), yy.min(), yy.max()),
               cmap=plt.cm.Paired,
               aspect='auto', origin='lower')
     
    plt.plot(reduced_data[:, 0], reduced_data[:, 1], 'k.', markersize=2)
    # Plot the centroids as a white X
    centroids = kmeans.cluster_centers_
    plt.scatter(centroids[:, 0], centroids[:, 1],
                marker='x', s=169, linewidths=3,
                color='w', zorder=10)
    plt.title('K-means clustering on the digits dataset (PCA-reduced data) '
              'Centroids are marked with white cross')
    plt.xlim(x_min, x_max)
    plt.ylim(y_min, y_max)
    plt.xticks(())
    plt.yticks(())
    plt.show()

     决策树可视化

    1. 安装绘图软件GraphViz(graphviz-2.38.zip 下载),并将解压路径添加到环境变量(通过我的电脑改环境变量貌似不行)

    1
    2
    3
    4
    5
    6
    # 添加环境变量
    import os
    os.environ["PATH"+= os.pathsep + 'D:/graphviz-2.38/release/bin/'
     
    # 安装相关包
    pip install graphviz pydotplus

    2. 绘制决策树

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    #import io
    #import graphviz
    import pydotplus
    from sklearn.datasets import load_iris
    from sklearn import tree
    from IPython.display import Image
     
    iris = load_iris()
    clf = tree.DecisionTreeClassifier()
    clf = clf.fit(iris.data, iris.target)
    #tree.plot_tree(clf.fit(iris.data, iris.target))
     
    #dot_data = tree.export_graphviz(clf, out_file=None)  #黑白
    dot_data = tree.export_graphviz(clf, out_file=None,
                          feature_names=iris.feature_names, 
                          class_names=iris.target_names, 
                          filled=True, rounded=True
                          special_characters=True)
    #dot_data = io.StringIO()
    #tree.export_graphviz(clf, out_file=dot_data)
    #graph = graphviz.Source(dot_data)
    #graph.render("iris")  #导出为iris.pdf
    #graph
     
    graph = pydotplus.graphviz.graph_from_dot_data(dot_data)
    Image(graph.create_png())
     
    # ---------------------------------------------------
    #from numpy import loadtxt
    from sklearn.datasets import load_iris
    from xgboost import XGBClassifier
    from xgboost import plot_tree
    import matplotlib.pyplot as plt
    # load data
    #iris = loadtxt('pima-indians-diabetes.csv', delimiter=",")
    iris = load_iris()
    # split data into X and y
    = iris.data
    = iris.target
    # fit model no training data
    model = XGBClassifier()
    model.fit(X, y)
    # plot single tree
    fig = plt.figure(dpi=180)
    ax = plt.subplot(1,1,1)
    plot_tree(model, num_trees=4, ax = ax)
    plt.show()

     

    时间序列数据可视化

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    #import seaborn as sns
    from datetime import datetime
    from statsmodels.tsa.seasonal import seasonal_decompose
    from statsmodels.tsa.stattools import adfuller
    #import statsmodels.api as sm
    #import statsmodels.formula.api as smf
    #import statsmodels.tsa.api as smt
    #sm.graphics.tsa.plot_acf
    from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
    from matplotlib.ticker import MultipleLocator, FormatStrFormatter
     
    def plot_acf_pacf(y, lags=12):
        plt.figure(figsize=(148))
        layout = (32)
        def tsplot(y, layout, i, plotlags=20, title=''):
            ts_ax   = plt.subplot2grid(layout, (0, i))
            acf_ax  = plt.subplot2grid(layout, (1, i))
            pacf_ax = plt.subplot2grid(layout, (2, i))
            y.plot(ax=ts_ax)
            ts_ax.set_title(title)
            #y.plot(ax=hist_ax, kind='hist', bins=25)
            #hist_ax.set_title('Histogram')
            #设置主刻度标签文本的格式
            #xmajorFormatter = FormatStrFormatter('%1.1f') #设置x轴标签文本的格式
            #ax.xaxis.set_major_formatter(xmajorFormatter)
            #设置主刻度标签的位置
            #xmajorLocator  = MultipleLocator(20) #将x主刻度标签设置为20的倍数
            #ax.xaxis.set_major_locator(xmajorLocator)
            plot_acf(y, lags=plotlags, ax=acf_ax)  #lags=20
            #acf_ax.axhline(y=0.1,ls="--",c="r")  #添加水平直线
            #acf_ax.axhline(y=-0.1,linestyle="--",c="r")  #添加水平直线
            #plt.axvline(x=4,ls="-",c="green")  #添加垂直直线
            #plt.plot([0, 0.1], [lags, 0.1], linestyle='--', dashes=(5, 5))  #dashes分别表示线和空格长度
            #acf_ax.xaxis.set_ticks([i for i in range(0,plotlags+1,2)])
            acf_ax.set_xticks([i for in range(0,plotlags+1,2)])
            plot_pacf(y, lags=plotlags, ax=pacf_ax)
            #pacf_ax.axhline(y=0.1,ls="--",c="r")  #添加水平直线
            #pacf_ax.axhline(y=-0.1,linestyle="--",c="r")  #添加水平直线
            #pacf_ax.xaxis.set_ticks([i for i in range(0,plotlags+1,2)])
            pacf_ax.set_xticks([i for in range(0,plotlags+1,2)])
            #[ax.set_xlim(0) for ax in [acf_ax, pacf_ax]]
            #sns.despine()
        tsplot(y, layout, 0, plotlags=20, title='Original Series')
        tsplot(y.diff(lags).dropna(), layout, 1, plotlags=20, title='%sst Order Differencing'%(lags))
        plt.tight_layout()
        plt.show()
     
     
    plot_acf_pacf(income2, lags=12)
    plot_acf_pacf(payment2, lags=12)

      

  • 相关阅读:
    python全栈开发day20-类的三大特性继承、多态、封装
    python全栈开发day19-面向对象初识
    python全栈开发day21-2 几个装饰器总结
    python全栈开发day16-正则表达式和re模块
    python全栈开发day15-递归函数、二分查找
    python运算符优先级
    动手动脑4
    动手动脑3
    查询对象个数
    动手动脑2
  • 原文地址:https://www.cnblogs.com/GaoAnLee/p/14914600.html
Copyright © 2011-2022 走看看