zoukankan      html  css  js  c++  java
  • java 写一个 map reduce 矩阵相乘的案例

    1、写一个工具类用来生成 map reduce 实验 所需 input 文件

    下面两个是原始文件

    matrix1.txt

    1 2 -2 0
    3 3 4 -3
    -2 0 2 3
    5 3 -1 2
    -4 2 0 2
    

    matrix2.txt

    0 3 -1 2 -3
    1 3 5 -2 -1
    0 1 4 -1 2
    -2 2 -1 1 2
    
    package com.ghc.hadoop;
    
    import java.io.*;
    
    public class Utils {
        public static void main(String[] args){
            generateMatrixFromFile("inputs/matrix1.txt","outputs/outmatrix1.txt");
        }
        public static void generateMatrixFromFile(String src,String target){
            BufferedReader bufferedReader = null;
            BufferedWriter bufferedWriter = null;
            String outStr = null;
            try {
                bufferedReader =  new BufferedReader(new FileReader(src));
                bufferedWriter = new BufferedWriter(new FileWriter(target));
                String line = null;
                Integer row = 1;
                while((line=bufferedReader.readLine())!=null){
                    String[] columns = line.split(" ");
                    StringBuilder sb = new StringBuilder();
                    sb.append(row+"	");
                    for(int i=0;i<columns.length;i++){
                         sb.append((i+1)+"_"+columns[i])
                                 .append(",");
                    }
                    outStr = sb.toString();
                    if(sb.toString().endsWith(",")){
                        outStr = sb.substring(0,sb.length()-1);
                    }
                    bufferedWriter.write(outStr);
                    bufferedWriter.newLine();
                    bufferedWriter.flush();
                    row+=1;
                }
    
            } catch (FileNotFoundException e) {
                e.printStackTrace();
            } catch (IOException e) {
                e.printStackTrace();
            }
            finally {
                if(bufferedReader!=null){
                    try{bufferedReader.close();}
                    catch (IOException ioe){
                        ioe.printStackTrace();
                    }
                }
                if(bufferedWriter!=null){
                    try{bufferedWriter.close();}
                    catch (IOException ioe){
                        ioe.printStackTrace();
                    }
                }
            }
        }
    }
    
    

    利用上面的工具类生成 outmatrix1.txt , outmatrix2.txt 两个文本文件

    下面是利用工具类产生的两个文件,会用作 后面 map reduce 的 输入文件

    outmatrix1.txt

    1	1_1,2_2,3_-2,4_0
    2	1_3,2_3,3_4,4_-3
    3	1_-2,2_0,3_2,4_3
    4	1_5,2_3,3_-1,4_2
    5	1_-4,2_2,3_0,4_2
    
    

    outmatrix2.txt

    1	1_0,2_3,3_-1,4_2,5_-3
    2	1_1,2_3,3_5,4_-2,5_-1
    3	1_0,2_1,3_4,4_-1,5_2
    4	1_-2,2_2,3_-1,4_1,5_2
    
    

    这里稍微介绍下 , 矩阵相乘的原理, 左矩阵的列与右矩阵的行相乘,如上面的两个

    左矩阵 shape : 5x4 , 右矩阵 shape: 4x5 ===> out-> shape : 5x5 这里如果有不理解的地方请找到大学的线性代数老师

    下面将要对 右矩阵也就是 outmatrix2.txt 里的矩阵 转置 也就是 行列转换下, 因为可能会有多个 map reduce 作业,所以注意下命名 Mapper1 , Reducer1 等等

    package com.ghc.hadoop.mapper;
    
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    
    import java.io.IOException;
    
    public class Mapper1 extends Mapper<LongWritable,Text,Text,Text> {
        private Text outKey = new Text();
        private Text outValue = new Text();
        /**
         * key: 1
          value: 1 1_0,2_3,3_-1,4_2,5_-3
         本类用于转置矩阵
        */
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String[] rowAndLine = value.toString().split("	");
            // row 为行号
            String row = rowAndLine[0];
            String lines = rowAndLine[1];
            String[] columns = lines.split(",");
            // ["1_0","2_3","3_-1","4_2","5_-3"]
            for(int i=0;i<columns.length;i++){
                String[] columnAndValue = columns[i].split("_");
                String column = columnAndValue[0];
                String valueStr = columnAndValue[1];
                // column 列号 , value: 行号_值
                outKey.set(column);
                outValue.set(row+"_"+valueStr);
                context.write(outKey,outValue);
            }
        }
    }
    
    

    reduce 操作

    package com.ghc.hadoop.reducer;
    
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    
    import java.io.IOException;
    
    public class Reducer1 extends Reducer<Text,Text,Text,Text>{
        private Text outKey = new Text();
        private Text outValue = new Text();
    
        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            StringBuilder sb = new StringBuilder();
            // column 列号 , value: 行号_值
            for(Text text:values){
                sb.append(text+",");
            }
            String line = sb.toString();
            if(sb.toString().endsWith(",")){
                line = sb.toString().substring(0,sb.length());
            }
    
            outKey.set(key);
            outValue.set(line);
            context.write(outKey,outValue);
        }
    }
    
    

    推荐算法

    待续。。。

    如果有来生,一个人去远行,看不同的风景,感受生命的活力。。。
  • 相关阅读:
    如何很好的使用Linq的Distinct方法
    根据字符串获取对应类型(Type) 转
    .Net 读取xml
    认识ASP.NET MVC的5种AuthorizationFilter
    使用admin插入数据失败
    乱序批量精确修改文件名
    多进程+协程方案处理高IO密集,提升爬取效率
    Linux 安装 CMake
    Ubuntu 截图工具deepin-screenshot添加使用
    Linux virtualenv .bashrc配置文件
  • 原文地址:https://www.cnblogs.com/Frank99/p/9948024.html
Copyright © 2011-2022 走看看