zoukankan      html  css  js  c++  java
  • Hadoop实例之Java代码实现利用MapReduce求π值

    需求:假如有一个边长为1的正方形。以正方形的一个端点为圆心,以1为半径,画一个圆弧,于是在正方形内就有了一个直角扇形。在正方形里随机生成若干的点,则有些点是在扇形内,有些点是在扇形外。正方形的面积是1,扇形的面积是0.25*Pi。设点的数量一共是n,扇形内的点数量是nc,在点足够多足够密集的情况下,会近似有nc/n的比值约等于扇形面积与正方形面积的比值,也就是nc/n= 0.25*Pi/1,即Pi = 4*nc/n

    首先是随机生成点的问题,利用Halton序列算法随机生成的样本点十分均匀,计算精度较高,效果比较好。

    下面是网上找到的一个利用Halton序列算法随机生成的样本点的代码:

    public class Pi {
        static int digit = 40;
        private int[] bases= new int[2];
        private double[] baseDigit = new double[2];
        private double[][] background = new double[2][digit];
        private long index;
        
        Pi(int[] base) {
            bases = base.clone();
            index = 0;
     
            for(int i=0; i<bases.length; i++) {
                double b = 1.0/bases[i];
                baseDigit[i] = b;
                for(int j=0; j<digit; j++) {
                    background[i][j] = j == 0 ? b : background[i][j-1]*b;
                }
            }
        }
        
        double[] getNext() {
            index++;
            
            double[] result = {0,0};
     
            for(int i=0; i<bases.length; i++) {
                long num = index;
                int j = 0;
                while(num != 0) {
                    result[i] += num % bases[i] * background[i][j++];
                    num /= bases[i];
                }
            }
            
            return result;
        }
        
        public static void main(String[] args) {
            int[] base = {2,5};
            Pi test = new Pi(base);
            for(int x = 0; x < 100; x++){
                double[] t = test.getNext();
                System.out.println(t[0] + "	" + t[1]);
            }
            
        }
    
    
    }

    下面是计算π值的代码:

    package mapreduce;
    
    import java.io.IOException;
    
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.DoubleWritable;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    import mapreduce.Pi;//下面生成随机数的时候需要这个类,该类即上面那部分代码
    
    /**
     * 
     * @author sakura
     * 2019.9.3
     * 利用MapReduce计算π值
     *
     */
    public class CalPI {
        public static class PiMapper extends Mapper<Object, Text, Text, IntWritable>{
    
            int number=0; //定义一个变量,用来存放一共生成的点数
            
            //读取文件,每一行都是一个map 本程序读取的文件为十行,每行都是100000
            public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
                int pointNum = Integer.parseInt(value.toString());//将读取到的那一行赋值给pointNum
                number=number+pointNum;//将总点数赋值给number
                 int[] base = {2,5};//生成随机点所用
                Pi test = new Pi(base);//生成随机点所用
                for(int x = 0; x < number; x++){ //循环生成随机点
                    double[] t = test.getNext();//随机生成点,并将坐标存入数组
                    System.out.println(t[0] + "	" + t[1]);//控制台输出随机点的坐标
                    IntWritable result = new IntWritable(0); //定义输出值
                    if((t[0]*t[0]+t[1]*t[1])<=1)//判断生成的点是否在扇形面积内
                    {
                        result = new IntWritable(1);//如果在,将输出值赋值为1
                    }
                    value.set(String.valueOf(number));//定义输出键,输出键为当前生成点的总数
                    context.write(value, result);//写入
                }
            }
        }
    
        public static class PiReducer extends Reducer<Text,IntWritable,Text,DoubleWritable> {
            private DoubleWritable result = new DoubleWritable();//声明输出值
    
            public void reduce(Text key, Iterable<IntWritable> values,Context context) throws IOException, InterruptedException {
    
                double pointNum =Double.parseDouble(key.toString());//获取输入的键
                double sum = 0;//定义总数
                for (IntWritable val : values) {//循环从values里取值,累加和赋值给sum
                    sum += val.get();
                }
                result.set(sum/pointNum*4);//将计算得到的π值赋值给result
                
                context.write(key, result);//将键值,即生成点总数,和result,即计算得到的π值作为一个键值对写入context
            }
        }
    
        public static void main(String[] args) throws Exception {
            Configuration conf = new Configuration();
            Job job = Job.getInstance(conf,"calculate pi");
            job.setJarByClass(CalPI.class);
            job.setMapperClass(PiMapper.class);
            job.setReducerClass(PiReducer.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(DoubleWritable.class);
    
            Path  in  =  new  Path("hdfs://192.168.68.130:9000/user/hadoop/nai.txt");  //读入文件地址
            Path  out  = new Path("hdfs://192.168.68.130:9000/user/hadoop/output4");  //输出文件地址,output4不能存在
            FileInputFormat.addInputPath(job, in);
            FileOutputFormat.setOutputPath(job, out);
            System.exit(job.waitForCompletion(true) ? 0  :  1);  
      
        }
    
    
    }
  • 相关阅读:
    Python Revisited Day 13 (正则表达式)
    Python Revisited Day 06 (面向对象程序设计)
    Python Revisited (变量)
    Python Revisited Day 05(模块)
    Python Revisited Day 04 (控制结构与函数)
    Python Revisited Day 03 (组合数据类型)
    Numpy
    Python Revisited Day 01
    Python3使用openpyxl读写Excel文件
    Python3操作YAML文件
  • 原文地址:https://www.cnblogs.com/sakura--/p/11455467.html
Copyright © 2011-2022 走看看