zoukankan      html  css  js  c++  java
  • 已知分布的抽样

    本文主要是根据MC随机抽样思想,进行已知分布的抽样,对于数据分析有用,主要做如下几个版本

    • C++
    • MATLAB
    • C#
    • PYTHON
    • C

    C++版本的主要代码为

     (1)数据部分,概率密度分布

    const double energy[210]={21.000000, 22.000000, 23.000000, 24.000000, 25.000000, 26.000000, 27.000000, 28.000000, 29.000000, 30.000000, 31.000000, 32.000000, 33.000000, 34.000000, 35.000000, 36.000000, 37.000000, 38.000000, 39.000000, 40.000000, 41.000000, 42.000000, 43.000000, 44.000000, 45.000000, 46.000000, 47.000000, 48.000000, 49.000000, 50.000000, 51.000000, 52.000000, 53.000000, 54.000000, 55.000000, 56.000000, 57.000000, 58.000000, 59.000000, 60.000000};
    const double probability[210]={ 0.003172, 0.001603, 0.003484, 0.000000, 0.000365, 0.003666, 0.001642, 0.005879, 0.005202, 0.022721, 0.027685, 0.056204, 0.075032, 0.118366, 0.163304, 0.215736, 0.283005, 0.343148, 0.422711, 0.494946, 0.574868, 0.652534, 0.717833, 0.794975, 0.842019, 0.909445, 0.924232, 0.986227, 0.974940, 1.000000, 0.964995, 0.968823, 0.896359, 0.866523, 0.761573, 0.687310, 0.557857, 0.420584, 0.248464, 0.038391};

    (2)归一化概率密度分布并积分得到分布函数

                for (int x = 1; x < 210; x++)
                {
                    sump += probability[x];
                }
                pp[0] = probability[0];
                for (int x = 1; x < 210; x++)
                { pp[x] = pp[x - 1] + probability[x]; }

    (3)抽样函数

     1         double IncidentGammaEnergy()
     2         {
     3             double rand=G4UniformRand();
     4             double sum=pp[0];
     5             double pre_sum=0.;
     6             int p_size = sizeof(probability)/sizeof(probability[0]);
     7             for(int k=0;k<p_size-1;k++)
     8             {
     9                 if ((rand>pre_sum) && (rand<=sum))
    10                 {
    11                     return energy[k]+(energy[k+1]-energy[k])*(rand-pre_sum)*(sum-pre_sum);
    12                 }
    13                 pre_sum = sum;
    14                 sum=pp[k+1];
    15             }
    16             return energy[p_size-1];
    17         }

    matlab实现的代码为

    clear
    clc
    % data name is cs137.mat,it has sub var x and y load cs137 x=x; y=y; % normalize y=y/sum(y); n=length(x); % fenbuhanshu pp=zeros(n,1); for i=1:n-1 pp(i+1)=pp(i)+y(i+1); end % sample now pre_sum=0; sumup=pp(1); m=100000; rand_number=rand(m,1); out=zeros(m,1); for j=1:m for i=1:n-1 if(rand_number(j)>pre_sum)&&(rand_number(j)<=sumup) out(j)=x(i); end pre_sum=sumup; sumup=pp(i+1); end out=out'; end % plot and compare [histvalue,nbins]=hist(out,1:1:n); histvalue=histvalue'; plot(nbins,histvalue/sum(histvalue)) hold on plot(x,y,'r') hold off legend('抽样结果','原始数据')

    抽样100000次(m=100000)结果为

    m=10000

    m=1000;

    C#实现

    using MathNet.Numerics.LinearAlgebra;
    using MathNet.Numerics.Statistics;
    using System;
    using System.Collections.Generic;
    using System.IO;
    
    namespace array_sample
    {
        internal class Program
        {
            private static void Main(string[] args)
            {
                var spectest = loadspec("spectest");
                var result = IncidentGammaEnergy(spectest, 100000);
                int n = spectest.Length;
                //int n = 200;
                double[] histallll = new double[n];
                var histogram = new Histogram(result, n);
                for (int i = 0; i < histallll.Length; i++)
                {
                    histallll[i] = histogram[i].Count;
                }
    
                //save_data(histallll, "test.txt");
    
                save_data(histallll, "test.txt");
                //foreach (double test in histallll)
                //{
                //    Console.WriteLine(test);
                //}
                //Console.WriteLine(histogram);
            }
    
            private static double[] IncidentGammaEnergy(double[] probability, int cishu)
            {
                double sump = 0;
                double[] pp = new double[probability.Length];
                double[] energy = new double[probability.Length];
    
                for (int x = 0; x < probability.Length; x++)
                { sump += probability[x]; }
                //double pp[210];
                pp[0] = probability[0] / sump;
                energy[0] = 0;
                for (int x = 1; x < probability.Length; x++)
                {
                    pp[x] = pp[x - 1] + probability[x] / sump;
                    energy[x] = x;
                }
    
                double sum = pp[0];
                double pre_sum = 0.0;
                int p_size = probability.Length;
                double[] result = new double[cishu];
                var randoms = GenerateRandom(cishu);
                for (int i = 0; i < cishu; i++)
                {
                    //double rand = new Random().NextDouble();
                    //long tick = DateTime.Now.Ticks;
                    //Random ran = new Random((int)(tick & 0xffffffffL) | (int)(tick >> 32));
                    //double rand = ran.NextDouble();
                    //Console.WriteLine(rand);
                    var rand = randoms[i];
                    //Console.WriteLine(rand);
                    for (int k = 0; k < p_size - 1; k++)
                    {
                        if ((rand > pre_sum) && (rand <= sum))
                        {
                            result[i] = energy[k];// +(energy[k + 1] - energy[k]) * (rand - pre_sum) * (sum - pre_sum);
                        }
                        pre_sum = sum;
                        sum = pp[k + 1];
                    }
                    //result[i] = energy[p_size - 1];
                }
                return result;
            }
    
            private static List<double> GenerateRandom(int iNum)
            {
                long lTick = DateTime.Now.Ticks;
                List<double> lstRet = new List<double>();
                for (int i = 0; i < iNum; i++)
                {
                    Random ran = new Random((int)lTick * i);
                    double iTmp = ran.NextDouble();
                    lstRet.Add(iTmp);
                    lTick += (new Random((int)lTick).Next(978));
                }
                return lstRet;
            }
    
            private static double[] loadspec(string specname)
            {
                var M = Matrix<double>.Build;
                var V = Vector<double>.Build;
    
                string[] content = File.ReadAllLines(@specname);
                double[] yvalue = new double[content.Length];
                //double[] axesx = new double[content.Length];
    
                for (int i = 1; i < content.Length; i++)
                {
                    //axesx[i] = i;
                    yvalue[i] = Convert.ToDouble(content[i]);
                }
                return yvalue;
            }
    
            private static void save_data(double[] content, string str)
            {
                FileStream fs = new FileStream(@str, FileMode.Create);
                StreamWriter sw = new StreamWriter(fs);
    
                for (int i = 1; i < content.Length; i++)
                {
                    sw.WriteLine(Convert.ToDouble(content[i]));//Convert.ToString(hist)
                }
                sw.Close();
                sw.Dispose();
                sw = null;
            }
    
            private static void save_data(Histogram content, string str)
            {
                FileStream fs = new FileStream(@str, FileMode.Create);
                StreamWriter sw = new StreamWriter(fs);
    
                sw.Write(content);//Convert.ToString(hist)
    
                sw.Close();
                sw.Dispose();
                sw = null;
            }
        }
    }
    

      其中遇到的主要问题是随机数的问题,跟MATLAB不同,他生成的随机数有很多重复,这是种子决定的,也就是时间变化赶不上生成的速度,于是查找资料,参考改写了种子,而且只能提前生成随机数。没解决的问题是histgram函数后面指定lower bound 和upper bound都有问题,所以只能让他自动确定了。其实可以自己写一个hist函数,参考引用4或5即可。

    2016-12-30 01:24:05

    明天写一下python实现,还没安装python,囧~突然发现有安装winpython,只是以前安装的,升级WIN10的1601版本,由于是全新升级,导致各种path路径都没了。。。

    python实现

     基本思路是使用random.choice函数。首先将数据读入,简单点先做数组好了,复杂点就读取文件,将数组中元素采用*n的形式搞定,n指的是纵坐标,需要为整数。直接choice函数抽样,采用循环,设定不同次数,然后hist结果,最后画图。具体实现如下

    按照以上思路果然实现了(2016-12-31,03:18:05)

    # -*- coding: utf-8 -*-
    import numpy as np
    import random as rd
    import matplotlib.pyplot as plt
    import matplotlib
    zhfont1 = matplotlib.font_manager.FontProperties(fname='C:WindowsFontssimsun.ttc')
    
    #load data from file
    bb=np.loadtxt('spectest')
    cc=bb.copy()
    n=len(bb)
    aa=range(n)
    
    #sum all yvalue
    test=0
    for k in range(len(bb)):
        test += bb[k]
    
    #creat an array with yvalues' xvalue
    out = [0]*test
    j=0
    for k in range(len(aa)):
        while bb[k]!=0:
            out[j]= aa[k]
            bb[k]=bb[k]-1
            j=j+1
    
    #sample
    cishu=100000
    count=0
    result=[0]*cishu
    while count<cishu:
        result[count]=rd.choice(out)
        count=count+1
    
    #hist it
    histout=np.histogram(result, bins=range(1025))
    #save data
    np.savetxt('result.txt',histout[0])
    np.savetxt('cc.txt',cc)
        
    #plot data
    fig = plt.figure(1)
    x = range(1024)
    plt.plot(x,histout[0]/cishu,'m:',label="sampled data")
    plt.plot(x,cc/test,'k-',label="raw data")
    plt.xlabel('道址',fontproperties=zhfont1)
    plt.ylabel('计数',fontproperties=zhfont1)
    plt.legend(loc = 'upper left')
    plt.show()
    fig.savefig("test.png")
    

      

    中间遇到一些波折,这是python自己写的第一个程序吧,,,其中的一个波折是数组长度搞错了,test应该用第一列的纵坐标之和不小心用成了第三列x.*y之和,如下图

    检查了下,发现c#中写入文件后没有fs.close,特此标记下。

    C的将来有时间再说了,或者谁有兴趣给做个也好啊,C还是不够熟悉哦,虽然大学只是学了C。。。

    参考文献:

    1、许淑艳老师的《蒙特卡罗方法在实验核物理中的应用》的第三章《由已知分布的随机抽样》

    2、C#产生一组不重复随机数的两种方法

    3、MATH.NET

    4、msdn的Random.NextDouble Method ()

    5、JasenKin的《算法--区间数据计算》

  • 相关阅读:
    浅谈XXE漏洞攻击与防御——本质上就是注入,盗取数据用
    Linux pwn入门教程——CTF比赛
    IDA 逆向工程 反汇编使用
    使用virustotal VT 查询情报——感觉远远没有微步、思科好用,10万条数据查出来5万条都有postives >0的记录,尼玛!!!
    使用VAE、CNN encoder+孤立森林检测ssl加密异常流的初探——真是一个忧伤的故事!!!
    优步每周结算时间:每周二下午4点!
    成都Uber优步司机奖励政策(3月30日)
    北京Uber优步司机奖励政策(3月30日)
    滴滴快车奖励政策,高峰奖励,翻倍奖励,按成交率,指派单数分级(3月30日)
    成都Uber优步司机奖励政策(3月29日)
  • 原文地址:https://www.cnblogs.com/liq07lzucn/p/6231043.html
Copyright © 2011-2022 走看看