zoukankan      html  css  js  c++  java
  • 小马哥课堂-统计学-置信区间(2)

     1 #!/usr/bin/env python3
     2 #-*- coding:utf-8 -*-
     3 #############################################
     4 #File Name: ci_ex1.py
     5 #Brief: 示例1
     6 #Author: frank
     7 #Email: frank0903@aliyun.com
     8 #Created Time:2018-08-13 19:33:11
     9 #Blog: http://www.cnblogs.com/black-mamba
    10 #Github: https://github.com/xiaomagejunfu0903/statistic_notes
    11 #############################################
    12 import matplotlib.pyplot as plt
    13 import numpy as np
    14 import scipy.stats
    15 import random
    16 
    17 #随机抽取36个苹果,计算200,000苹果的重量在100到124g范围的概率
    18 def sampling_distribution(data,confidence=0.95,sampling_times=1,sampling_size=30):
    19     list_sampling_mean = []
    20     
    21     for i in np.arange(sampling_times):
    22         #samples = np.random.choice(data,sampling_size)#重复抽样
    23         samples = random.sample(list(data),sampling_size)#不重复抽样
    24         #print("samples:{}".format(samples))
    25         samples_mean = np.mean(samples)
    26         list_sampling_mean.append(samples_mean)
    27         
    28     
    29     #the sampling distribution of the sampling mean,样本均值的抽样分布
    30     sam_mean = np.mean(list_sampling_mean)
    31     print("样本均值分布的期望,sam_mean:{}".format(sam_mean))
    32     sam_std = np.std(list_sampling_mean)
    33     print("样本均值分布的标准差,sam_std:{}".format(sam_std))
    34     
    35     #获取样本均值分布~N(sam_mean, sam_std^2)
    36     norm = scipy.stats.norm(loc=sam_mean, scale=sam_std)
    37     
    38     #获取距离sam_mean 4个sam_std的范围
    39     x = np.arange(sam_mean-sam_std*4, sam_mean+sam_std*4, 1)
    40     #获取x对应的概率密度函数值
    41     y = norm.pdf(x)
    42     #显示样本均值分布
    43     plt.plot(x, y,'b--',alpha=0.7,label='sample distribution of the sample mean')
    44 
    45     #获取置信区间,方法1
    46     confidence *= 100
    47     c_low = (100 - confidence) / 2
    48     c_high = 100 - c_low
    49     c_interval = np.percentile(list_sampling_mean,[c_low, c_high])
    50     print("95%置信区间端点对应的x坐标:{}".format(c_interval))
    51     
    52 
    53     #获取置信区间,方法2
    54     c_interval1 = [norm.isf(1-0.025),norm.isf(1-0.975)]
    55     print("95%置信区间端点对应的x坐标:{}".format(c_interval1))
    56     print("[{},{}]".format(sam_mean-sam_std*1.8, sam_mean+sam_std*1.8))
    57 
    58     #绘制置信区间对应的竖线
    59     a = c_interval[0]
    60     b = c_interval[1]
    61     plt.vlines(a, 0, norm.pdf(a),'r')
    62     plt.vlines(b, 0, norm.pdf(b),'r')
    63     
    64     a1 = c_interval1[0]
    65     b1 = c_interval1[1]
    66     plt.vlines(a1, 0, norm.pdf(a1),'g',alpha=0.5)
    67     plt.vlines(b1, 0, norm.pdf(b1),'g',alpha=0.5)
    68     
    69     fill_x = np.linspace(a1,b1,100)
    70     #print("type of fill_x:{}".format(type(fill_x)))
    71     fill_y = norm.pdf(fill_x)
    72     #print("type of fill_y:{}".format(type(fill_y)))
    73     plt.fill_between(fill_x, fill_y, color='b',alpha=0.5)
    74 
    75 
    76 #模拟总体200,000个苹果的重量数据
    77 #用不同的数据测试
    78 #data1 = np.random.randint(10, 200, size=200000)
    79 data1 = np.random.randint(50, 200, size=200000)
    80 
    81 #总体均值
    82 population_mean = np.mean(data1)
    83 print("population mean:{}".format(population_mean))
    84 
    85 #总体标准差
    86 population_std = np.std(data1)
    87 print("population std:{}".format(population_std))
    88 
    89 #总体分布曲线
    90 norm1 = scipy.stats.norm(population_mean, population_std)
    91 x = np.arange(population_mean-population_std*3.5, population_mean+population_std*3.5, 1)
    92 y = norm1.pdf(x)
    93 plt.plot(x, y,'r--',label='population distribution',alpha=0.7)
    94 
    95 
    96 sampling_distribution(data1,sampling_times=1000,sampling_size=36) 
    97 
    98 plt.legend()
    99 plt.show() 
    ci_ex1.py 演示了小马哥课堂-统计学-置信区间 的示例1

     1 #!/usr/bin/env python3
     2 #-*- coding:utf-8 -*-
     3 #############################################
     4 #File Name: ci_ex2.py
     5 #Brief: 演示示例2
     6 #Author: frank
     7 #Email: frank0903@aliyun.com
     8 #Created Time:2018-08-13 20:14:44
     9 #Blog: http://www.cnblogs.com/black-mamba
    10 #Github: https://github.com/xiaomagejunfu0903/statistic_notes
    11 #############################################
    12 import numpy as np
    13 import matplotlib.pyplot as plt
    14 import matplotlib as mpl
    15 import scipy.stats
    16 
    17 #设置中文字体
    18 zhfont = mpl.font_manager.FontProperties(fname='/usr/share/fonts/truetype/wqy/wqy-microhei.ttc')
    19 
    20 #95%置信水平的标准正态分布的置信区间演示
    21 norm = scipy.stats.norm()
    22 #x轴
    23 x = np.linspace(norm.ppf(0.001),norm.ppf(0.999), 10000)
    24 #y轴
    25 y= norm.pdf(x)
    26 #显示标准正态分布曲线
    27 plt.plot(x,y)
    28 
    29 #画置信区间的界限
    30 plt.vlines(-1.96,ymin=0,ymax=norm.pdf(-1.96),colors='r', linestyles='dashed')
    31 plt.vlines(1.96,ymin=0,ymax=norm.pdf(1.96),colors='r', linestyles='dashed')
    32 
    33 #显示 置信区间
    34 fill_x = np.linspace(-1.96,1.96,100)
    35 #print("type of fill_x:{}".format(type(fill_x)))
    36 fill_y = norm.pdf(fill_x)
    37 #print("type of fill_y:{}".format(type(fill_y)))
    38 plt.fill_between(fill_x, fill_y, color='b',alpha=0.5)
    39 
    40 #已知置信水平,求置信区间
    41 
    42 #方法1:不太准确
    43 confidence = 95
    44 c_low = (100-confidence)/2
    45 c_high = 100-c_low
    46 c_interval = np.percentile(x,[c_low, c_high])
    47 print(c_interval)
    48 plt.vlines(c_interval[0] ,ymin=0,ymax=0.3,colors='purple', linestyles='dashed')
    49 plt.vlines(c_interval[1] ,ymin=0,ymax=0.3,colors='purple', linestyles='dashed')
    50 
    51 #方法2:更好的方式
    52 c_interval = [norm.isf(1-0.025),norm.isf(1-0.975)]
    53 print(c_interval)
    54 plt.vlines(c_interval[0] ,ymin=0,ymax=norm.pdf(-1.96),colors='r', linestyles='dashed',alpha=0.5)
    55 plt.vlines(c_interval[1] ,ymin=0,ymax=norm.pdf(1.96),colors='r', linestyles='dashed',alpha=0.5)
    56 
    57 plt.title('标准正态分布的95%置信区间',fontproperties=zhfont)
    58 plt.savefig("ci_ex2.jpg")
    59 #print("pdf(1.96):{}".format(norm.pdf(1.96)))
    60 #print("cdf(1.96):{}".format(norm.cdf(1.96)))
    61 #print("sf(1.96):{}".format(norm.sf(1.96)))
    62 #print("isf(0.025):{}".format(norm.isf(0.025)))
    63 plt.show()
    ci_ex2.py 演示了小马哥课堂-统计学-置信区间 的示例2

    
    
  • 相关阅读:
    hdu 1028 Ignatius and the Princess III (n的划分)
    CodeForces
    poj 3254 Corn Fields (状压DP入门)
    HYSBZ 1040 骑士 (基环外向树DP)
    PAT 1071 Speech Patterns (25)
    PAT 1077 Kuchiguse (20)
    PAT 1043 Is It a Binary Search Tree (25)
    PAT 1053 Path of Equal Weight (30)
    c++ 常用标准库
    常见数学问题
  • 原文地址:https://www.cnblogs.com/black-mamba/p/9472824.html
Copyright © 2011-2022 走看看