zoukankan      html  css  js  c++  java
  • 小马哥课堂-统计学-置信区间(2)

     1 #!/usr/bin/env python3
     2 #-*- coding:utf-8 -*-
     3 #############################################
     4 #File Name: ci_ex1.py
     5 #Brief: 示例1
     6 #Author: frank
     7 #Email: frank0903@aliyun.com
     8 #Created Time:2018-08-13 19:33:11
     9 #Blog: http://www.cnblogs.com/black-mamba
    10 #Github: https://github.com/xiaomagejunfu0903/statistic_notes
    11 #############################################
    12 import matplotlib.pyplot as plt
    13 import numpy as np
    14 import scipy.stats
    15 import random
    16 
    17 #随机抽取36个苹果,计算200,000苹果的重量在100到124g范围的概率
    18 def sampling_distribution(data,confidence=0.95,sampling_times=1,sampling_size=30):
    19     list_sampling_mean = []
    20     
    21     for i in np.arange(sampling_times):
    22         #samples = np.random.choice(data,sampling_size)#重复抽样
    23         samples = random.sample(list(data),sampling_size)#不重复抽样
    24         #print("samples:{}".format(samples))
    25         samples_mean = np.mean(samples)
    26         list_sampling_mean.append(samples_mean)
    27         
    28     
    29     #the sampling distribution of the sampling mean,样本均值的抽样分布
    30     sam_mean = np.mean(list_sampling_mean)
    31     print("样本均值分布的期望,sam_mean:{}".format(sam_mean))
    32     sam_std = np.std(list_sampling_mean)
    33     print("样本均值分布的标准差,sam_std:{}".format(sam_std))
    34     
    35     #获取样本均值分布~N(sam_mean, sam_std^2)
    36     norm = scipy.stats.norm(loc=sam_mean, scale=sam_std)
    37     
    38     #获取距离sam_mean 4个sam_std的范围
    39     x = np.arange(sam_mean-sam_std*4, sam_mean+sam_std*4, 1)
    40     #获取x对应的概率密度函数值
    41     y = norm.pdf(x)
    42     #显示样本均值分布
    43     plt.plot(x, y,'b--',alpha=0.7,label='sample distribution of the sample mean')
    44 
    45     #获取置信区间,方法1
    46     confidence *= 100
    47     c_low = (100 - confidence) / 2
    48     c_high = 100 - c_low
    49     c_interval = np.percentile(list_sampling_mean,[c_low, c_high])
    50     print("95%置信区间端点对应的x坐标:{}".format(c_interval))
    51     
    52 
    53     #获取置信区间,方法2
    54     c_interval1 = [norm.isf(1-0.025),norm.isf(1-0.975)]
    55     print("95%置信区间端点对应的x坐标:{}".format(c_interval1))
    56     print("[{},{}]".format(sam_mean-sam_std*1.8, sam_mean+sam_std*1.8))
    57 
    58     #绘制置信区间对应的竖线
    59     a = c_interval[0]
    60     b = c_interval[1]
    61     plt.vlines(a, 0, norm.pdf(a),'r')
    62     plt.vlines(b, 0, norm.pdf(b),'r')
    63     
    64     a1 = c_interval1[0]
    65     b1 = c_interval1[1]
    66     plt.vlines(a1, 0, norm.pdf(a1),'g',alpha=0.5)
    67     plt.vlines(b1, 0, norm.pdf(b1),'g',alpha=0.5)
    68     
    69     fill_x = np.linspace(a1,b1,100)
    70     #print("type of fill_x:{}".format(type(fill_x)))
    71     fill_y = norm.pdf(fill_x)
    72     #print("type of fill_y:{}".format(type(fill_y)))
    73     plt.fill_between(fill_x, fill_y, color='b',alpha=0.5)
    74 
    75 
    76 #模拟总体200,000个苹果的重量数据
    77 #用不同的数据测试
    78 #data1 = np.random.randint(10, 200, size=200000)
    79 data1 = np.random.randint(50, 200, size=200000)
    80 
    81 #总体均值
    82 population_mean = np.mean(data1)
    83 print("population mean:{}".format(population_mean))
    84 
    85 #总体标准差
    86 population_std = np.std(data1)
    87 print("population std:{}".format(population_std))
    88 
    89 #总体分布曲线
    90 norm1 = scipy.stats.norm(population_mean, population_std)
    91 x = np.arange(population_mean-population_std*3.5, population_mean+population_std*3.5, 1)
    92 y = norm1.pdf(x)
    93 plt.plot(x, y,'r--',label='population distribution',alpha=0.7)
    94 
    95 
    96 sampling_distribution(data1,sampling_times=1000,sampling_size=36) 
    97 
    98 plt.legend()
    99 plt.show() 
    ci_ex1.py 演示了小马哥课堂-统计学-置信区间 的示例1

     1 #!/usr/bin/env python3
     2 #-*- coding:utf-8 -*-
     3 #############################################
     4 #File Name: ci_ex2.py
     5 #Brief: 演示示例2
     6 #Author: frank
     7 #Email: frank0903@aliyun.com
     8 #Created Time:2018-08-13 20:14:44
     9 #Blog: http://www.cnblogs.com/black-mamba
    10 #Github: https://github.com/xiaomagejunfu0903/statistic_notes
    11 #############################################
    12 import numpy as np
    13 import matplotlib.pyplot as plt
    14 import matplotlib as mpl
    15 import scipy.stats
    16 
    17 #设置中文字体
    18 zhfont = mpl.font_manager.FontProperties(fname='/usr/share/fonts/truetype/wqy/wqy-microhei.ttc')
    19 
    20 #95%置信水平的标准正态分布的置信区间演示
    21 norm = scipy.stats.norm()
    22 #x轴
    23 x = np.linspace(norm.ppf(0.001),norm.ppf(0.999), 10000)
    24 #y轴
    25 y= norm.pdf(x)
    26 #显示标准正态分布曲线
    27 plt.plot(x,y)
    28 
    29 #画置信区间的界限
    30 plt.vlines(-1.96,ymin=0,ymax=norm.pdf(-1.96),colors='r', linestyles='dashed')
    31 plt.vlines(1.96,ymin=0,ymax=norm.pdf(1.96),colors='r', linestyles='dashed')
    32 
    33 #显示 置信区间
    34 fill_x = np.linspace(-1.96,1.96,100)
    35 #print("type of fill_x:{}".format(type(fill_x)))
    36 fill_y = norm.pdf(fill_x)
    37 #print("type of fill_y:{}".format(type(fill_y)))
    38 plt.fill_between(fill_x, fill_y, color='b',alpha=0.5)
    39 
    40 #已知置信水平,求置信区间
    41 
    42 #方法1:不太准确
    43 confidence = 95
    44 c_low = (100-confidence)/2
    45 c_high = 100-c_low
    46 c_interval = np.percentile(x,[c_low, c_high])
    47 print(c_interval)
    48 plt.vlines(c_interval[0] ,ymin=0,ymax=0.3,colors='purple', linestyles='dashed')
    49 plt.vlines(c_interval[1] ,ymin=0,ymax=0.3,colors='purple', linestyles='dashed')
    50 
    51 #方法2:更好的方式
    52 c_interval = [norm.isf(1-0.025),norm.isf(1-0.975)]
    53 print(c_interval)
    54 plt.vlines(c_interval[0] ,ymin=0,ymax=norm.pdf(-1.96),colors='r', linestyles='dashed',alpha=0.5)
    55 plt.vlines(c_interval[1] ,ymin=0,ymax=norm.pdf(1.96),colors='r', linestyles='dashed',alpha=0.5)
    56 
    57 plt.title('标准正态分布的95%置信区间',fontproperties=zhfont)
    58 plt.savefig("ci_ex2.jpg")
    59 #print("pdf(1.96):{}".format(norm.pdf(1.96)))
    60 #print("cdf(1.96):{}".format(norm.cdf(1.96)))
    61 #print("sf(1.96):{}".format(norm.sf(1.96)))
    62 #print("isf(0.025):{}".format(norm.isf(0.025)))
    63 plt.show()
    ci_ex2.py 演示了小马哥课堂-统计学-置信区间 的示例2

    
    
  • 相关阅读:
    树:二叉树
    树:红黑树
    gtest
    VDB R&D
    QML 从入门到放弃
    json parse
    Effective C++ 笔记
    Samples topic
    C++ 11 snippets , 2
    C++ 11 snippets , 1
  • 原文地址:https://www.cnblogs.com/black-mamba/p/9472824.html
Copyright © 2011-2022 走看看