zoukankan      html  css  js  c++  java
  • Hadoop下大矩阵乘法Version2

    1)使用本方法计算F*B,其中F是1000*1000的矩阵,B是1000*20000的矩阵,使用三个节点的集群,每个节点一个CPU核(集群装在虚拟机里,
    宿主机只有4个CPU核),每个节点配置一个map槽,一个reduce槽,完成矩阵运算时间为5mins。
    2)源码如下:
      1 /**
      2  * Created with IntelliJ IDEA.
      3  * User: hadoop
      4  * Date: 16-3-14
      5  * Time: 下午3:13
      6  * To change this template use File | Settings | File Templates.
      7  */
      8 import org.apache.hadoop.conf.Configuration;
      9 import org.apache.hadoop.fs.FileSystem;
     10 import java.io.IOException;
     11 import java.lang.reflect.Array;
     12 import java.net.URI;
     13 import org.apache.hadoop.fs.Path;
     14 import org.apache.hadoop.io.*;
     15 import org.apache.hadoop.io.DoubleWritable;
     16 import org.apache.hadoop.io.Writable;
     17 import org.apache.hadoop.mapreduce.InputSplit;
     18 import org.apache.hadoop.mapreduce.Job;
     19 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
     20 import org.apache.hadoop.mapreduce.lib.input.FileSplit;
     21 import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
     22 import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
     23 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
     24 import org.apache.hadoop.mapreduce.Reducer;
     25 import org.apache.hadoop.mapreduce.Mapper;
     26 import org.apache.hadoop.filecache.DistributedCache;
     27 import org.apache.hadoop.util.ReflectionUtils;
     28 
     29 public class MutiDoubleInputMatrixProduct {
     30 
     31     public static void initDoubleArrayWritable(int length,DoubleWritable[] doubleArrayWritable){
     32         for (int i=0;i<length;++i){
     33           doubleArrayWritable[i]=new DoubleWritable(0.0);
     34         }
     35     }
     36 
     37     public static  class MyMapper extends Mapper<IntWritable,DoubleArrayWritable,IntWritable,DoubleArrayWritable>{
     38         public DoubleArrayWritable map_value=new DoubleArrayWritable();
     39         public  double[][] leftMatrix=null;/******************************************/
     40         //public Object obValue=null;
     41         public DoubleWritable[] arraySum=null;
     42         public DoubleWritable[] tempColumnArrayDoubleWritable=null;
     43         public DoubleWritable[] tempRowArrayDoubleWritable=null;
     44         public double sum=0;
     45         public double uValue;
     46         public int leftMatrixRowNum;
     47         public int leftMatrixColumnNum;
     48         public void setup(Context context) throws IOException {
     49             Configuration conf=context.getConfiguration();
     50             leftMatrixRowNum=conf.getInt("leftMatrixRowNum",10);
     51             leftMatrixColumnNum=conf.getInt("leftMatrixColumnNum",10);
     52             leftMatrix=new double[leftMatrixRowNum][leftMatrixColumnNum];
     53             uValue=(double)(context.getConfiguration().getFloat("u",1.0f));
     54             tempRowArrayDoubleWritable=new DoubleWritable[leftMatrixColumnNum];
     55             initDoubleArrayWritable(leftMatrixColumnNum,tempRowArrayDoubleWritable);
     56             tempColumnArrayDoubleWritable=new DoubleWritable[leftMatrixRowNum];
     57             initDoubleArrayWritable(leftMatrixRowNum,tempColumnArrayDoubleWritable);
     58             System.out.println("map setup() start!");
     59             //URI[] cacheFiles=DistributedCache.getCacheFiles(context.getConfiguration());
     60             Path[] cacheFiles=DistributedCache.getLocalCacheFiles(conf);
     61             String localCacheFile="file://"+cacheFiles[0].toString();
     62             //URI[] cacheFiles=DistributedCache.getCacheFiles(conf);
     63             //DistributedCache.
     64             System.out.println("local path is:"+cacheFiles[0].toString());
     65             // URI[] cacheFiles=DistributedCache.getCacheFiles(context.getConfiguration());
     66             FileSystem fs =FileSystem.get(URI.create(localCacheFile), conf);
     67             SequenceFile.Reader reader=null;
     68             reader=new SequenceFile.Reader(fs,new Path(localCacheFile),conf);
     69             IntWritable key= (IntWritable)ReflectionUtils.newInstance(reader.getKeyClass(),conf);
     70             DoubleArrayWritable value= (DoubleArrayWritable)ReflectionUtils.newInstance(reader.getValueClass(),conf);
     71             //int valueLength=0;
     72             int rowIndex=0;
     73             int index;
     74             while (reader.next(key,value)){
     75                 index=-1;
     76                 for (Writable val:value.get()){
     77                     tempRowArrayDoubleWritable[++index].set(((DoubleWritable)val).get());
     78                 }
     79                 //obValue=value.toArray();
     80                 rowIndex=key.get();
     81                 leftMatrix[rowIndex]=new double[leftMatrixColumnNum];
     82                 //this.leftMatrix=new double[valueLength][Integer.parseInt(context.getConfiguration().get("leftMatrixColumnNum"))];
     83                 for (int i=0;i<leftMatrixColumnNum;++i){
     84                     //leftMatrix[rowIndex][i]=Double.parseDouble(Array.get(obValue, i).toString());
     85                     //leftMatrix[rowIndex][i]=Array.getDouble(obValue, i);
     86                     leftMatrix[rowIndex][i]= tempRowArrayDoubleWritable[i].get();
     87                 }
     88 
     89             }
     90             arraySum=new DoubleWritable[leftMatrix.length];
     91             initDoubleArrayWritable(leftMatrix.length,arraySum);
     92         }
     93         public void map(IntWritable key,DoubleArrayWritable value,Context context) throws IOException, InterruptedException {
     94             //obValue=value.toArray();
     95             InputSplit inputSplit=context.getInputSplit();
     96             String fileName=((FileSplit)inputSplit).getPath().getName();
     97             if (fileName.startsWith("FB")) {
     98                 context.write(key,value);
     99             }
    100             else{
    101                 int ii=-1;
    102                 for(Writable val:value.get()){
    103                     tempColumnArrayDoubleWritable[++ii].set(((DoubleWritable)val).get());
    104                 }
    105                 //arraySum=new DoubleWritable[this.leftMatrix.length];
    106                 for (int i=0;i<this.leftMatrix.length;++i){
    107                     sum=0;
    108                     for (int j=0;j<this.leftMatrix[0].length;++j){
    109                         //sum+= this.leftMatrix[i][j]*Double.parseDouble(Array.get(obValue,j).toString())*(double)(context.getConfiguration().getFloat("u",1f));
    110                         //sum+= this.leftMatrix[i][j]*Array.getDouble(obValue,j)*uValue;
    111                         sum+= this.leftMatrix[i][j]*tempColumnArrayDoubleWritable[j].get()*uValue;
    112                     }
    113                     arraySum[i].set(sum);
    114                     //arraySum[i].set(sum);
    115                 }
    116                 map_value.set(arraySum);
    117                 context.write(key,map_value);
    118             }
    119         }
    120     }
    121     public static class MyReducer extends Reducer<IntWritable,DoubleArrayWritable,IntWritable,DoubleArrayWritable>{
    122         public DoubleWritable[] sum=null;
    123        // public Object obValue=null;
    124         public DoubleArrayWritable valueArrayWritable=new DoubleArrayWritable();
    125         public DoubleWritable[] tempColumnArrayDoubleWritable=null;
    126        // public DoubleWritable[] tempRowArrayDoubleWritable=null;
    127         //private int leftMatrixColumnNum;
    128         private int leftMatrixRowNum;
    129 
    130         public void setup(Context context){
    131             //leftMatrixColumnNum=context.getConfiguration().getInt("leftMatrixColumnNum",100);
    132             leftMatrixRowNum=context.getConfiguration().getInt("leftMatrixRowNum",100);
    133             sum=new DoubleWritable[leftMatrixRowNum];
    134             initDoubleArrayWritable(leftMatrixRowNum,sum);
    135             //tempRowArrayDoubleWritable=new DoubleWritable[leftMatrixColumnNum];
    136             tempColumnArrayDoubleWritable=new DoubleWritable[leftMatrixRowNum];
    137             initDoubleArrayWritable(leftMatrixRowNum,tempColumnArrayDoubleWritable);
    138         }
    139 
    140         public void reduce(IntWritable key,Iterable<DoubleArrayWritable>value,Context context) throws IOException, InterruptedException {
    141             //int valueLength=0;
    142             for(DoubleArrayWritable doubleValue:value){
    143                 int index=-1;
    144                 for (Writable val:doubleValue.get()){
    145                     tempColumnArrayDoubleWritable[++index].set(((DoubleWritable)val).get());
    146                  }
    147                 //valueLength=Array.getLength(obValue);
    148                 for (int i=0;i<leftMatrixRowNum;++i){
    149                     //sum[i]=new DoubleWritable(Double.parseDouble(Array.get(obValue,i).toString())+sum[i].get());
    150                     //sum[i]=new DoubleWritable(Array.getDouble(obValue,i)+sum[i].get());
    151                     sum[i].set(tempColumnArrayDoubleWritable[i].get()+sum[i].get());
    152                 }
    153             }
    154             //valueArrayWritable.set(sum);
    155             valueArrayWritable.set(tempColumnArrayDoubleWritable);
    156             context.write(key,valueArrayWritable);
    157             for (int i=0;i<sum.length;++i){
    158                 sum[i].set(0.0);
    159             }
    160 
    161         }
    162     }
    163 
    164     public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    165         String uri=args[3];
    166         String outUri=args[4];
    167         String cachePath=args[2];
    168         HDFSOperator.deleteDir(outUri);
    169         Configuration conf=new Configuration();
    170         DistributedCache.addCacheFile(URI.create(cachePath),conf);//添加分布式缓存
    171         /**************************************************/
    172         //FileSystem fs=FileSystem.get(URI.create(uri),conf);
    173         //fs.delete(new Path(outUri),true);
    174         /*********************************************************/
    175         conf.setInt("leftMatrixColumnNum",Integer.parseInt(args[0]));
    176         conf.setInt("leftMatrixRowNum",Integer.parseInt(args[1]));
    177         conf.setFloat("u",0.35f);
    178         conf.set("mapred.jar","MutiDoubleInputMatrixProduct.jar");
    179         Job job=new Job(conf,"MatrixProdcut");
    180         job.setJarByClass(MutiDoubleInputMatrixProduct.class);
    181         job.setInputFormatClass(SequenceFileInputFormat.class);
    182         job.setOutputFormatClass(SequenceFileOutputFormat.class);
    183         job.setMapperClass(MyMapper.class);
    184         job.setReducerClass(MyReducer.class);
    185         job.setMapOutputKeyClass(IntWritable.class);
    186         job.setMapOutputValueClass(DoubleArrayWritable.class);
    187         job.setOutputKeyClass(IntWritable.class);
    188         job.setOutputValueClass(DoubleArrayWritable.class);
    189         FileInputFormat.setInputPaths(job, new Path(uri));
    190         FileOutputFormat.setOutputPath(job,new Path(outUri));
    191         System.exit(job.waitForCompletion(true)?0:1);
    192     }
    193 
    194 
    195 }
    196 class DoubleArrayWritable extends ArrayWritable {
    197     public DoubleArrayWritable(){
    198         super(DoubleWritable.class);
    199     }
    200 /*
    201     public String toString(){
    202         StringBuilder sb=new StringBuilder();
    203         for (Writable val:get()){
    204             DoubleWritable doubleWritable=(DoubleWritable)val;
    205             sb.append(doubleWritable.get());
    206             sb.append(",");
    207         }
    208         sb.deleteCharAt(sb.length()-1);
    209         return sb.toString();
    210     }
    211 */
    212 }
    213 
    214 class HDFSOperator{
    215     public static boolean deleteDir(String dir)throws IOException{
    216         Configuration conf=new Configuration();
    217         FileSystem fs =FileSystem.get(conf);
    218         boolean result=fs.delete(new Path(dir),true);
    219         System.out.println("sOutput delete");
    220         fs.close();
    221         return result;
    222     }
    223 }
     
  • 相关阅读:
    python之路(三)-深浅拷贝
    Python之路(一)-python简介
    Web端裁剪图片方法
    如何将github上源代码导入eclipse中
    转 GitHub上史上最全的Android开源项目分类汇总
    转 GitHub上最火的40个Android开源项目(二)
    转 GitHub上最火的40个Android开源项目(一)
    转 GitHub上最火的74个Android开源项目(三)
    CSS实现文本溢出的部分用省略号代替的方法
    时尚且健壮: 实现更优秀的CSS
  • 原文地址:https://www.cnblogs.com/lz3018/p/5279460.html
Copyright © 2011-2022 走看看